import datetime
import json
import time
import math
import re
import traceback
import urllib
from urllib import parse
import base64
import requests

from parsel import Selector
from re_common.baselibrary.database.mysql import json_update
from re_common.baselibrary.utils.basedict import BaseDicts
from re_common.baselibrary.utils.basetime import BaseTime
from re_common.baselibrary.utils.baseurl import BaseUrl
from re_common.vip.baseencodeid import BaseLngid

from apps.crawler_platform.core_platform.core_sql import CoreSqlValue
from apps.crawler_platform.core_platform.g_model import DealModel, CallBackModel, DealInsertModel, DealUpdateModel, \
    OperatorSqlModel, DealItemModel, \
    EtlDealModel, PolicyListModel, PolicyArticleModel

__all__ = [
    "policy_fgwguizhoulist_callback",
    "policy_fgwguizhouarticle_callback",
    "policy_fgwguizhouarticle_etl_callback",
    "policy_gxtguizhoulist_callback",
    "policy_gxtguizhouarticle_callback",
    "policy_gxtguizhouarticle_etl_callback",
    "policy_kjtguizhoulist_callback",
    "policy_kjtguizhouarticle_callback",
    "policy_kjtguizhouarticle_etl_callback",
    "policy_jytguizhoulist_callback",
    "policy_jytguizhoulist1_callback",
    "policy_jytguizhouarticle_callback",
    "policy_jytguizhouarticle_etl_callback",
    "policy_mztguizhoulist_callback",
    "policy_mztguizhoulist1_callback",
    "policy_mztguizhouarticle_callback",
    "policy_mztguizhouarticle_etl_callback",
    "policy_cztguizhoulist_callback",
    "policy_cztguizhouarticle_callback",
    "policy_cztguizhouarticle_etl_callback",
    "policy_rstguizhoulist_callback",
    "policy_rstguizhoulist1_callback",
    "policy_rstguizhouarticle_callback",
    "policy_rstguizhouarticle_etl_callback",
    "policy_nynctguizhoulist_callback",
    "policy_nynctguizhouarticle_callback",
    "policy_nynctguizhouarticle_etl_callback",
    "policy_zfcxjstguizhoulist_callback",
    "policy_zfcxjstguizhoulist1_callback",
    "policy_zfcxjstguizhouarticle_callback",
    "policy_zfcxjstguizhouarticle_etl_callback",
    "policy_wjwguizhoulist_callback",
    "policy_wjwguizhoulist1_callback",
    "policy_wjwguizhouarticle_callback",
    "policy_wjwguizhouarticle_etl_callback",
    "policy_guiyanglist_callback",
    "policy_guiyanglist1_callback",
    "policy_guiyanglist2_callback",
    "policy_guiyangarticle_callback",
    "policy_guiyangarticle_etl_callback",
    "policy_zunyilist_callback",
    "policy_zunyilist1_callback",
    "policy_zunyiarticle_callback",
    "policy_zunyiarticle_etl_callback",
    "policy_gzlpslist_callback",
    "policy_gzlpsarticle_callback",
    "policy_gzlpsarticle_etl_callback",
    "policy_anshunlist_callback",
    "policy_anshunarticle_callback",
    "policy_anshunarticle_etl_callback",
    "policy_bijielist_callback",
    "policy_bijiearticle_callback",
    "policy_bijiearticle_etl_callback",
    "policy_trslist_callback",
    "policy_trsarticle_callback",
    "policy_trsarticle_etl_callback",
    "policy_qdnlist_callback",
    "policy_qdnarticle_callback",
    "policy_qdnarticle_etl_callback",
    "policy_qiannanlist_callback",
    "policy_qiannanarticle_callback",
    "policy_qiannanarticle_etl_callback",
    "policy_qxnlist_callback",
    "policy_qxnarticle_callback",
    "policy_qxnarticle_etl_callback",
    "policy_drcxizanglist_callback",
    "policy_drcxizangarticle_callback",
    "policy_drcxizangarticle_etl_callback",
    "policy_jxtxizanglist_callback",
    "policy_jxtxizangarticle_callback",
    "policy_jxtxizangarticle_etl_callback",
    "policy_stixizanglist_callback",
    "policy_stixizangarticle_callback",
    "policy_stixizangarticle_etl_callback",
    "policy_eduxizanglist_callback",
    "policy_eduxizangarticle_callback",
    "policy_eduxizangarticle_etl_callback",
    "policy_mztxizanglist_callback",
    "policy_mztxizangarticle_callback",
    "policy_mztxizangarticle_etl_callback",
    "policy_hrssxizanglist_callback",
    "policy_hrssxizangarticle_callback",
    "policy_hrssxizangarticle_etl_callback",
    "policy_nynctxizanglist_callback",
    "policy_nynctxizangarticle_callback",
    "policy_nynctxizangarticle_etl_callback",
    "policy_zjtxizanglist_callback",
    "policy_zjtxizangarticle_callback",
    "policy_zjtxizangarticle_etl_callback",
    "policy_wjwxizanglist_callback",
    "policy_wjwxizangarticle_callback",
    "policy_wjwxizangarticle_etl_callback",
    "policy_lasalist_callback",
    "policy_lasalist1_callback",
    "policy_lasaarticle_callback",
    "policy_lasaarticle_etl_callback",
    "policy_rikazelist_callback",
    "policy_rikazearticle_callback",
    "policy_rikazearticle_etl_callback",
    "policy_changdulist_callback",
    "policy_changduarticle_callback",
    "policy_changduarticle_etl_callback",
    "policy_linzhilist_callback",
    "policy_linzhiarticle_callback",
    "policy_linzhiarticle_etl_callback",
    "policy_shannanlist_callback",
    "policy_shannanarticle_callback",
    "policy_shannanarticle_etl_callback",
    "policy_naqulist_callback",
    "policy_naquarticle_callback",
    "policy_naquarticle_etl_callback",
    "policy_allist_callback",
    "policy_alarticle_callback",
    "policy_alarticle_etl_callback",
    "policy_sndrcshaanxilist_callback",
    "policy_sndrcshaanxiarticle_callback",
    "policy_sndrcshaanxiarticle_etl_callback",
    "policy_gxtshaanxilist_callback",
    "policy_gxtshaanxiarticle_callback",
    "policy_gxtshaanxiarticle_etl_callback",
    "policy_kjtshaanxilist_callback",
    "policy_kjtshaanxiarticle_callback",
    "policy_kjtshaanxiarticle_etl_callback",
    "policy_jytshaanxilist_callback",
    "policy_jytshaanxiarticle_callback",
    "policy_jytshaanxiarticle_etl_callback",
    "policy_mztshaanxilist_callback",
    "policy_mztshaanxiarticle_callback",
    "policy_mztshaanxiarticle_etl_callback",
    "policy_rstshaanxilist_callback",
    "policy_rstshaanxiarticle_callback",
    "policy_rstshaanxiarticle_etl_callback",
    "policy_nynctshaanxilist_callback",
    "policy_nynctshaanxiarticle_callback",
    "policy_nynctshaanxiarticle_etl_callback",
    "policy_jsshaanxilist_callback",
    "policy_jsshaanxiarticle_callback",
    "policy_jsshaanxiarticle_etl_callback",
    "policy_sxwjwshaanxilist_callback",
    "policy_sxwjwshaanxiarticle_callback",
    "policy_sxwjwshaanxiarticle_etl_callback",
    "policy_xalist_callback",
    "policy_xaarticle_callback",
    "policy_xaarticle_etl_callback",
    "policy_baojilist_callback",
    "policy_baojiarticle_callback",
    "policy_baojiarticle_etl_callback",
    "policy_xianyanglist_callback",
    "policy_xianyangarticle_callback",
    "policy_xianyangarticle_etl_callback",
    "policy_tongchuanlist_callback",
    "policy_tongchuanarticle_callback",
    "policy_tongchuanarticle_etl_callback",
    "policy_weinanlist_callback",
    "policy_weinanarticle_callback",
    "policy_weinanarticle_etl_callback",
    "policy_yananlist_callback",
    "policy_yananarticle_callback",
    "policy_yananarticle_etl_callback",
    "policy_yllist_callback",
    "policy_ylarticle_callback",
    "policy_ylarticle_etl_callback",
    "policy_hanzhonglist_callback",
    "policy_hanzhongarticle_callback",
    "policy_hanzhongarticle_etl_callback",
    "policy_ankanglist_callback",
    "policy_ankangarticle_callback",
    "policy_ankangarticle_etl_callback",
    "policy_shangluolist_callback",
    "policy_shangluoarticle_callback",
    "policy_shangluoarticle_etl_callback",
    "policy_fgwqinghailist_callback",
    "policy_fgwqinghaiarticle_callback",
    "policy_fgwqinghaiarticle_etl_callback",
    "policy_gxgzqinghailist_callback",
    "policy_gxgzqinghaiarticle_callback",
    "policy_gxgzqinghaiarticle_etl_callback",
    "policy_kjtqinghailist_callback",
    "policy_kjtqinghaiarticle_callback",
    "policy_kjtqinghaiarticle_etl_callback",
    "policy_jytqinghailist_callback",
    "policy_jytqinghaiarticle_callback",
    "policy_jytqinghaiarticle_etl_callback",
    "policy_mztqinghailist_callback",
    "policy_mztqinghaiarticle_callback",
    "policy_mztqinghaiarticle_etl_callback",
    "policy_cztqinghailist_callback",
    "policy_cztqinghailist1_callback",
    "policy_cztqinghaiarticle_callback",
    "policy_cztqinghaiarticle_etl_callback",
    "policy_rstqinghailist_callback",
    "policy_rstqinghaiarticle_callback",
    "policy_rstqinghaiarticle_etl_callback",
    "policy_nynctqinghailist_callback",
    "policy_nynctqinghaiarticle_callback",
    "policy_nynctqinghaiarticle_etl_callback",
    "policy_zjtqinghailist_callback",
    "policy_zjtqinghaiarticle_callback",
    "policy_zjtqinghaiarticle_etl_callback",
    "policy_wsjkwqinghailist_callback",
    "policy_wsjkwqinghaiarticle_callback",
    "policy_wsjkwqinghaiarticle_etl_callback",
    "policy_xininglist_callback",
    "policy_xiningarticle_callback",
    "policy_xiningarticle_etl_callback",
    "policy_haidonglist_callback",
    "policy_haidongarticle_callback",
    "policy_haidongarticle_etl_callback",
    "policy_haibeilist_callback",
    "policy_haibeiarticle_callback",
    "policy_haibeiarticle_etl_callback",
    "policy_huangnanlist_callback",
    "policy_huangnanarticle_callback",
    "policy_huangnanarticle_etl_callback",
    "policy_hainanzhoulist_callback",
    "policy_hainanzhouarticle_callback",
    "policy_hainanzhouarticle_etl_callback",
    "policy_guoluolist_callback",
    "policy_guoluoarticle_callback",
    "policy_guoluoarticle_etl_callback",
    "policy_yushuzhoulist_callback",
    "policy_yushuzhouarticle_callback",
    "policy_yushuzhouarticle_etl_callback",
    "policy_haixilist_callback",
    "policy_haixiarticle_callback",
    "policy_haixiarticle_etl_callback",
    "policy_jyglist_callback",
    "policy_jygarticle_callback",
    "policy_jygarticle_etl_callback",
    "policy_jcslist_callback",
    "policy_jcslist1_callback",
    "policy_jcsarticle_callback",
    "policy_jcsarticle_etl_callback",
    "policy_baiyinlist_callback",
    "policy_baiyinarticle_callback",
    "policy_baiyinarticle_etl_callback",
    "policy_tianshuilist_callback",
    "policy_tianshuilist1_callback",
    "policy_tianshuiarticle_callback",
    "policy_tianshuiarticle_etl_callback",
    "policy_gswuweilist_callback",
    "policy_gswuweiarticle_callback",
    "policy_gswuweiarticle_etl_callback",
    "policy_zhangyelist_callback",
    "policy_zhangyearticle_callback",
    "policy_zhangyearticle_etl_callback",
    "policy_pinglianglist_callback",
    "policy_pingliangarticle_callback",
    "policy_pingliangarticle_etl_callback",
    "policy_jiuquanlist_callback",
    "policy_jiuquanarticle_callback",
    "policy_jiuquanarticle_etl_callback",
    "policy_zgqingyanglist_callback",
    "policy_zgqingyangarticle_callback",
    "policy_zgqingyangarticle_etl_callback",
    "policy_longnanlist_callback",
    "policy_longnanarticle_callback",
    "policy_longnanarticle_etl_callback",
    "policy_linxialist_callback",
    "policy_linxiaarticle_callback",
    "policy_linxiaarticle_etl_callback",
    "policy_gnzrmzflist_callback",
    "policy_gnzrmzfarticle_callback",
    "policy_gnzrmzfarticle_etl_callback",
    "policy_dingxilist_callback",
    "policy_dingxiarticle_callback",
    "policy_dingxiarticle_etl_callback",


    "policy_fzgggansuarticle_etl_callback",
    "policy_gxtgansuarticle_etl_callback",
    "policy_kjtgansuarticle_etl_callback",
    "policy_jytgansuarticle_etl_callback",
    "policy_mztgansuarticle_etl_callback",
    "policy_cztgansuarticle_etl_callback",
    "policy_rstgansuarticle_etl_callback",
    "policy_nyncgansuarticle_etl_callback",
    "policy_zjtgansuarticle_etl_callback",
    "policy_wsjkgansuarticle_etl_callback",
    "policy_lanzhouarticle_etl_callback",
    "policy_cztshaanxiarticle_etl_callback",
]


def clean_pubdate(value):
    if not value:
        return ''
    value = re.sub('\D', '', value)
    value = value[:8]
    value = value.ljust(8, '0')
    if int(value[4:6]) > 12:
        value = value[:4] + '0000'
    if int(value[6:]) > 31:
        value = value[:6] + '00'
    return value


def cleaned(value):
    if value:
        if type(value) is list:
            return ' '.join([i.strip() for i in value]).strip()
        else:
            return value.strip()
    else:
        return ""


def judge_url(url):
    if len(url) > 500:
        return True
    if '/' not in url.replace('//', ''):
        return True
    if 'mailt' in url or 'data:image/' in url or 'javascript:' in url or '#' in url or 'weixin.qq' in url:
        return True
    if '.baidu' in url or '。' in url or '@163' in url or '.cn/）' in url or '8080）' in url or 'cn）' in url:
        return True
    if 'cn，' in url or 'com，' in url or 'cn,' in url or 'haosou.' in url or 'www.so.' in url or 'file://' in url:
        return True
    if 'C:' in url or 'baike.soso' in url or 'weibo.com' in url or 'baike.sogou' in url or 'html）' in url:
        return True
    if 'shtml）' in url or 'phtml）' in url or 'wx.qq.' in url or 'bing.com' in url:
        return True
    if url.endswith('/') or url.endswith('.net') or url.endswith('.asp') or url.endswith('.shtml'):
        return True
    if url.endswith('/share') or url.endswith('.exe') or url.endswith('.xml'):
        return True
    if url.endswith('pdf}') or url.endswith('jpg}'):
        return True
    ends = url.split('/')[-1].lower()
    if not ends:
        return True
    if ends.endswith('.htm') or ends.endswith('.shtml') or ends.endswith('.jhtml') or ends.endswith('.org'):
        return True
    if ends.endswith('xhtml') or ends.endswith('.phtml') or ends.endswith('.cn') or ends.endswith('.com'):
        return True
    if ends.endswith('.html') or ends.endswith('.mht') or ends.endswith('.html%20'):
        return True
    if '.jsp' in ends and len(ends.split('.', 1)[1]) < 7:
        return True

    return False


def get_file_info(data, res, xpath):
    url = data['provider_url']
    pub_year = data['pub_year']
    keyid = data['keyid']
    file_info = list()
    tag_list = res.xpath(f'{xpath}//a')
    url_list = list()
    for tag in tag_list:
        file_href = tag.xpath('@href').extract_first()
        if file_href and file_href.strip():
            file_href = file_href.strip()
            try:
                file_url = parse.urljoin(url, file_href)
            except:
                continue
            if judge_url(file_url):
                continue
            name = ''.join(tag.xpath('.//text()').extract()).strip()
            if file_url in url_list:
                continue
            else:
                url_list.append(file_url)
            file_info.append({'url': file_url, 'name': name, 'pub_year': pub_year, 'keyid': keyid})
    img_hrefs = res.xpath(f'{xpath}//*/@src').extract()
    for img_href in img_hrefs:
        if img_href.strip():
            img_href = img_href.strip()
            img_url = parse.urljoin(url, img_href)
            if judge_url(img_url):
                continue
            if img_url in url_list:
                continue
            else:
                url_list.append(img_url)
            file_info.append({'url': img_url, 'name': img_href, 'pub_year': pub_year, 'keyid': keyid})
    return file_info


def deal_sql_dict(sql_dict):
    sql_dict.pop("id")
    sql_dict.pop("update_time")
    sql_dict.pop("create_time")
    sql_dict.pop("null_dicts")
    sql_dict.pop("err_msg")
    sql_dict.pop("other_dicts")
    sql_dict.pop("state")
    sql_dict.pop("failcount")
    return sql_dict


def init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider):
    data = dict()
    data['rawid'] = rawid
    data['rawid_mysql'] = rawid
    data['lngid'] = lngid
    data['keyid'] = lngid
    data['product'] = product
    data['sub_db'] = 'POLICY'
    data['sub_db_id'] = sub_db_id
    data['provider'] = 'CNGOV'
    data['zt_provider'] = zt_provider
    data['source_type'] = '16'
    data['latest_date'] = down_date_str[:8]
    data['batch'] = down_date_str
    data['vision'] = '1'
    data['is_deprecated'] = '0'
    data['country'] = 'CN'
    data['language'] = 'ZH'
    return data


def init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year):
    full_text_data = dict()
    full_text_data['lngid'] = lngid
    full_text_data['keyid'] = lngid
    full_text_data['sub_db_id'] = sub_db_id
    full_text_data['source_type'] = '16'
    full_text_data['latest_date'] = down_date_str[:8]
    full_text_data['batch'] = down_date_str
    full_text_data['is_deprecated'] = '0'
    full_text_data['filename'] = f"{lngid}.html"
    full_text_data['fulltext_type'] = "html"
    full_text_data['fulltext_addr'] = ''
    full_text_data['fulltext_size'] = ''
    full_text_data['fulltext_txt'] = fulltext
    full_text_data['page_cnt'] = "1"
    full_text_data['pub_year'] = pub_year
    return full_text_data


#   贵州省发展和改革委员会
def policy_fgwguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//div[@class="zcjd_list"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('h2/a/@href').extract_first()
                base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99585'
                article_json["url"] = url
                article_json["title"] = li.xpath('h2/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('h2/span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="zfxxgk_zdgkc"]/ul/li|//div[@class="new_list auto"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99585'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_fgwguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_fgwguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="Article_bt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = re.findall("var xxfl = '(.*?)'", html)
    if not subject:
        subject = re.findall("\? xxfl='(.*?)'", html)
    subject = subject[0].strip() if subject else ''
    written_date = re.findall('var cwrq="(.*?)"', html)
    written_date = written_date[0].strip() if written_date else ''
    legal_status = re.findall("var  isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99585'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "FGWGUIZHOU"
    zt_provider = "fgwguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省工业和信息化厅
def policy_gxtguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'gxdt/tzgg' in callmodel.sql_model.list_rawid or 'zfxxgk/fdzdgknr/ghxx' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//ul[@class="newsList"]/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                base_url = f'https://gxt.guizhou.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99586'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//tbody[@id="idData"]/tr')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                base_url = f'https://gxt.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99586'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = ''
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gxtguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gxtguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="Article_bt"]//text()|//div[@class="zwxxgk_ndbgwz"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if not pub_date:
        pub_date = re.findall('var gkrq="(.*?)"', html)
        pub_date = clean_pubdate(pub_date[0].strip()) if pub_date else ''
        pub_year = pub_date[:4]
    if not pub_date:
        pub_date = re.findall("pubdata='(.*?)'", html)
        pub_date = clean_pubdate(pub_date[0].strip()) if pub_date else ''
        pub_year = pub_date[:4]
    if not pub_date:
        raise Exception
    pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = re.findall("var xxfl = '(.*?)'", html)
    if not subject:
        subject = re.findall("\? xxfl='(.*?)'", html)
    subject = subject[0].strip() if subject else ''
    written_date = re.findall('var cwrq="(.*?)"', html)
    written_date = written_date[0].strip() if written_date else ''
    legal_status = re.findall("var isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//font[@id="Zoom"]|//div[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99586'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GXTGUIZHOU"
    zt_provider = "gxtguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省科学技术厅
def policy_kjtguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//div[@class="right-list-box"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                base_url = f'http://kjt.guizhou.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99587'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//tbody[@id="idData"]/tr')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[3]/a/@href').extract_first()
                base_url = f'http://kjt.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99587'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[3]/a/text()').extract_first().strip()
                article_json["pub_date"] = ''
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_kjtguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_kjtguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="Article_bt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if not pub_date:
        pub_date = re.findall('var gkrq="(.*?)"', html)
        pub_date = clean_pubdate(pub_date[0].strip()) if pub_date else ''
        pub_year = pub_date[:4]
    if not pub_date:
        pub_date = re.findall('gkrq="(.*?)"', html)
        pub_date = clean_pubdate(pub_date[-1].strip()) if pub_date else ''
        pub_year = pub_date[:4]
    if not pub_date:
        pub_date = re.findall("pubdata='(.*?)'", html)
        pub_date = clean_pubdate(pub_date[0].strip()) if pub_date else ''
        pub_year = pub_date[:4]
    if not pub_date:
        raise Exception
    pub_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"信息分类：")]/following::td[1]//text()').extract()).strip()
    written_date = re.findall('var cwrq="(.*?)"', html)
    written_date = written_date[0].strip() if written_date else ''
    legal_status = re.findall("var  isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99587'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "KJTGUIZHOU"
    zt_provider = "kjtguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省教育厅
def policy_jytguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="NewsList"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'https://jyt.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99588'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jytguizhoulist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = 'https://jyt.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99588'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jytguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jytguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="Article_bt"]//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
    # index_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
    impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
    invalid_date = re.findall("let FZRQ = '(.*?)'", html)
    invalid_date = invalid_date[0].strip() if invalid_date else ''
    legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    # if not organ:
    #     organ_info = ''.join(res.xpath('//p[@class="easysite-news-describe"]//text()').extract()).strip()
    #     organ = organ_info.split('来源：')[-1].split('')[0].strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99588'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JYTGUIZHOU"
    zt_provider = "jytguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省民政厅
def policy_mztguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="NewsList"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://mzt.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99589'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_mztguizhoulist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = 'http://mzt.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99589'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_mztguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_mztguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="title"]//h1//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
    # index_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
    impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
    invalid_date = re.findall("let FZRQ = '(.*?)'", html)
    invalid_date = invalid_date[0].strip() if invalid_date else ''
    legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    # if not organ:
    #     organ_info = ''.join(res.xpath('//p[@class="easysite-news-describe"]//text()').extract()).strip()
    #     organ = organ_info.split('来源：')[-1].split('')[0].strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99589'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "MZTGUIZHOU"
    zt_provider = "mztguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省财政厅
def policy_cztguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="NewsList"]/li|//div[@class="zfxxgk_zdgkc"]/ul/li|//div[@class="zcjd_list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('h3/a/@href|a/@href').extract_first()
            if not href:
                continue
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://czt.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99590'
            article_json["url"] = url
            article_json["title"] = li.xpath('h3/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('h3/span/text()|span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_cztguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_cztguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="ArticleTitle"]//text()|//div[@class="Article_bt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'layui-table' in html:
        pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
        subject = re.findall("var xxfl = '(.*?)'", html)
        if not subject:
            subject = re.findall("\? xxfl='(.*?)'", html)
        subject = subject[0].strip() if subject else ''
        written_date = re.findall('var cwrq="(.*?)"', html)
        written_date = written_date[0].strip() if written_date else ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//div[@class="xx_fl"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="xx_fl"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="xx_fl"]//strong[contains(text(),"信息分类：")]/following::td[1]//text()').extract()).strip()
        written_date = ''.join(res.xpath('//div[@class="xx_fl"]//strong[contains(text(),"生成日期：")]/following::td[1]//text()').extract()).strip()
        # impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        # invalid_date = re.findall("let FZRQ = '(.*?)'", html)[0].strip()
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99590'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "CZTGUIZHOU"
    zt_provider = "cztguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省人力资源和社会保障厅
def policy_rstguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="right-list-box"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'https://rst.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99591'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_rstguizhoulist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = 'https://rst.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99591'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_rstguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_rstguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="Article_bt"]//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'layui-table' in html:
        pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
        subject = re.findall("var xxfl = '(.*?)'", html)
        if not subject:
            subject = re.findall("\? xxfl='(.*?)'", html)
        subject = subject[0].strip() if subject else ''
        written_date = re.findall('var cwrq="(.*?)"', html)
        written_date = written_date[0].strip() if written_date else ''
        impl_date = ''
        invalid_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''
        subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
        written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
        impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        invalid_date = re.findall("let FZRQ = '(.*?)'", html)
        invalid_date = invalid_date[0].strip() if invalid_date else ''
        legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99591'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "RSTGUIZHOU"
    zt_provider = "rstguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省农业农村厅
def policy_nynctguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//tbody[@id="idData"]/tr|//div[@class="listInfo"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/a/@href|a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://nynct.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99592'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[2]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_nynctguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_nynctguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if not title:
        raise Exception
    pub_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"文号:")]/following::span[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"索引号:")]/following::span[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"信息分类:")]/following::span[1]//text()').extract()).strip()
    legal_status = re.findall("var  isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99592'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NYNCTGUIZHOU"
    zt_provider = "nynctguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省住房和城乡建设厅
def policy_zfcxjstguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="NewsList"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://zfcxjst.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99593'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zfcxjstguizhoulist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = 'http://zfcxjst.guizhou.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99593'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zfcxjstguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zfcxjstguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//h1//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'xxgk_xl_top' in html:
        pub_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"文号:")]/following::span[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"索引号:")]/following::span[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"信息分类:")]/following::span[1]//text()').extract()).strip()
        written_date = ''
        impl_date = ''
        invalid_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''
        subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
        written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
        impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        invalid_date = re.findall("let FZRQ = '(.*?)'", html)
        invalid_date = invalid_date[0].strip() if invalid_date else ''
        legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    # if not organ:
    #     organ_info = ''.join(res.xpath('//p[@class="easysite-news-describe"]//text()').extract()).strip()
    #     organ = organ_info.split('来源：')[-1].split('')[0].strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99593'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZFCXJSTGUIZHOU"
    zt_provider = "zfcxjstguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省卫生健康委员会
def policy_wjwguizhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//ul[@class="NewsList"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://wjw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99594'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_wjwguizhoulist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = f'http://wjw.guizhou.gov.cn/zwgk_500663/gzhgfxwjsjk/gfxwjsjk/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99594'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_wjwguizhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_wjwguizhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="title"]//h1//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    if 'xxgk_xl_top' in html:
        pub_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"文号:")]/following::span[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"索引号:")]/following::span[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="xxgk_xl_top"]//a[contains(text(),"信息分类:")]/following::span[1]//text()').extract()).strip()
        written_date = ''
        impl_date = ''
        invalid_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    elif 'article_ys' in html:
        pub_no = re.findall("var wh='(.*?)'", html)
        pub_no = pub_no[0].strip() if pub_no else ''
        index_no = ''.join(res.xpath('//div[@class="article_ys"]//td[contains(text(),"索引号")]/following::td[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="article_ys"]//td[contains(text(),"信息分类")]/following::td[1]//text()').extract()).strip()
        written_date = ''
        impl_date = ''
        invalid_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() != '0' and legal_status[0].strip() != '否' else ''
        organ = re.findall("var fbjg='(.*?)'", html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''
        subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
        written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
        impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        invalid_date = re.findall("let FZRQ = '(.*?)'", html)
        invalid_date = invalid_date[0].strip() if invalid_date else ''
        legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文机关：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('省'):
        organ = '贵州' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99594'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "WJWGUIZHOU"
    zt_provider = "wjwguizhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省贵阳市
def policy_guiyanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('countPage = (\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx_500663/tzgg' in callmodel.sql_model.list_rawid or 'fdzdgknr' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="main_3222 pr"]/li|//div[@class="zfxxgk_zdgkc"]/ul/li|//div[@class="right-content"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'https://www.guiyang.gov.cn/{callmodel.sql_model.list_rawid}/index_1.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99595'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()|a/div/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_guiyanglist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = f'https://www.guiyang.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99595'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_guiyanglist2_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        tcount = html_json['data']['search']['totalHits']
        total_page = math.ceil(int(tcount)/10)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['search']['searchs']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['viewUrl']
            base_url = f'https://www.guiyang.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99595'
            article_json["url"] = url
            article_json["title"] = li['title']
            article_json["pub_date"] = li['docDate']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_guiyangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_guiyangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="main_321"]//h4//text()|//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'tablec' in html:
        pub_no = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"文 号：")]/following::td[1]//text()').extract()).strip()
        subject = ''
        written_date = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"成文时间：")]/following::td[1]//text()').extract()).strip()
        impl_date = ''
        legal_status = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"有效性：")]/following::td[1]//text()').extract()).strip()
        legal_status = '有效' if legal_status == '是' else ''
        organ = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"发布机构：")]/following::td[1]//text()').extract()).strip()
    else:
        pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
        written_date = ''
        impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"制定机关：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('市'):
        organ = '贵阳' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//div[contains(@class,"view")]|//div[contains(@class,"main_3211")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99595'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GUIYANG"
    zt_provider = "guiyanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省遵义市
def policy_zunyilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx_500663/tzgg' in callmodel.sql_model.list_rawid or 'fdzdgknr' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="NewsList"]/li|//div[@class="zcjd_list"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('h2/a/@href|a/@href').extract_first()
            if not href:
                continue
            base_url = f'https://www.zunyi.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99596'
            article_json["url"] = url
            article_json["title"] = li.xpath('h2/a/@title|a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('h2/span/text()|span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zunyilist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['pager']['pageCount']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['doc_pub_url']
            base_url = f'https://www.zunyi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99596'
            article_json["url"] = url
            article_json["title"] = li['f_202163974476']
            article_json["pub_date"] = li['save_time']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zunyiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zunyiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="xxgk_nr"]//h1//text()|//div[@class="ArticleTitle"]//text()|//div[contains(@class,"DocTitle")]//text()').extract()).strip()
    if len(title) >= 50:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'detail_block1' in html:
        pub_no = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"文号")]/following::span[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"索引号")]/following::span[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"主题分类")]/following::span[1]//text()').extract()).strip()
        written_date = ''
        impl_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"发文字号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''
        subject = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"所属领域：")]/following::td[1]//text()').extract()).strip()
        written_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
        impl_date = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"施行日期：")]/following::td[1]//text()').extract()).strip()
        legal_status = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"文件状态：")]/following::td[1]//text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="XxgkTypeBox Box"]//td[contains(text(),"制定机关：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('市'):
        organ = '遵义' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99596'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZUNYI"
    zt_provider = "zunyigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省六盘水市
def policy_gzlpslist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
            li_list = res.xpath('//div[@class="dwdjd"]/table/tr')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[1]/a/@href').extract_first()
                base_url = f'http://www.gzlps.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99597'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[1]/a/@title').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[4]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="infoList"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                base_url = f'http://www.gzlps.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99597'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/@title').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gzlpsarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gzlpsarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="Article_bt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = re.findall("var xxfl = '(.*?)'", html)
    if not subject:
        subject = re.findall("\? xxfl='(.*?)'", html)
    subject = subject[0].strip() if subject else ''
    written_date = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"生成日期：")]/following::td[1]//text()').extract()).strip()
    legal_status = re.findall("var isok = '(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('市'):
        organ = '六盘水' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99597'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GZLPS"
    zt_provider = "gzlpsgovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省安顺市
def policy_anshunlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[@class="NewsList"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li|//div[@class="zcjd_list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('h2/a/@href|a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.anshun.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99598'
            article_json["url"] = url
            article_json["title"] = li.xpath('h2/a/@title|a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('h2/span/text()|span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_anshunarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_anshunarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    pub_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"索 引 号：")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="Article_xx"]//strong[contains(text(),"信息分类：")]/following::td[1]//text()').extract()).strip()
    # written_date = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"生成日期：")]/following::td[1]//text()').extract()).strip()
    legal_status = re.findall("var  isok='(.*?)'", html)
    if not legal_status:
        legal_status = re.findall("var  isok1='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html)[0].strip()
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html)[0].strip()
    organ = organ[0].strip() if organ else ''
    if organ.startswith('市'):
        organ = '安顺' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99598'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ANSHUN"
    zt_provider = "anshungovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省毕节市
def policy_bijielist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="NewsList"]/li|//div[@class="zcjd_list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('h2/a/@href|a/@href').extract_first()
            if not href:
                continue
            base_url = f'https://www.bijie.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99599'
            article_json["url"] = url
            article_json["title"] = li.xpath('h2/a/@title|a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('h2/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_bijiearticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_bijiearticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//text()|//div[@class="DocTitle"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    pub_no = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"文") and contains(text(),"号：")]/following::li[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"索") and contains(text(),"号：")]/following::li[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"信息分类：")]/following::li[1]//text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"生成日期：")]/following::li[1]//text()').extract()).strip()
    legal_status = re.findall("var isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall("var Publisher='(.*?)'", html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('市'):
        organ = '毕节' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99599'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "BIJIE"
    zt_provider = "bijiegovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省铜仁市
def policy_trslist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="NewsList"]/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.trs.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99600'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_trsarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_trsarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="xxgk_nr"]//h1//text()|//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="ArticleTitle"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    if 'detail_block1' in html:
        pub_no = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"文号")]/following::span[1]//text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"索引号")]/following::span[1]//text()').extract()).strip()
        subject = ''.join(res.xpath('//div[@class="detail_block1"]//span[contains(text(),"主题分类")]/following::span[1]//text()').extract()).strip()
        written_date = ''
        legal_status = re.findall("var  isok='(.*?)'", html)
        legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
        organ = re.findall('var str = "(.*?)"', html)
        if not organ:
            organ = re.findall('var str_1 = "(.*?)"', html)
        organ = organ[0].strip() if organ else ''
    else:
        pub_no = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"文 号：")]/following::td[1]//text()').extract()).strip()
        index_no = ''
        subject = ''
        written_date = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
        legal_status = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"有效性：")]/following::td[1]//text()').extract()).strip()
        legal_status = '有效' if legal_status == '是' else ''
        organ = ''.join(res.xpath('//table[@class="tablec"]//td[contains(text(),"发布机构：")]/following::td[1]//text()').extract()).strip()
    if organ.startswith('市'):
        organ = '铜仁' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99600'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "TRS"
    zt_provider = "trsgovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省黔东南苗族侗族自治州
def policy_qdnlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\(parseInt\("(\d+)', para_dicts["data"]["1_1"]['html'])
        if not max_count:
            max_count = re.findall("pagenav_tail.*?index_(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="ul_1"]/li|//b/parent::em[1]|//div[@class="zdly-center"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('em/a/@href|a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.qdn.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99601'
            article_json["url"] = url
            article_json["title"] = li.xpath('em/a/@title|a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_qdnarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_qdnarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="cont"]//h3//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = re.findall("var wh = '(.*?)'", html)
    pub_no = pub_no[0].strip() if pub_no else ''
    index_no = ''.join(res.xpath('//table[@class="wjbg"]//td[contains(text(),"索引号")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="wjbg"]//td[contains(text(),"信息分类")]/following::td[1]//text()').extract()).strip()
    legal_status = re.findall('var isok = "(.*?)"', html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall("var fbjg = '(.*?)'", html)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('州'):
        organ = '贵州省黔东南苗族侗族自治' + organ

    fulltext_xpath = '//div[@class="mainbox"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99601'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "QDN"
    zt_provider = "qdngovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省黔南布依族苗族自治州
def policy_qiannanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="NewsList"]/li|//div[@class="zcjd_list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('h2/a/@href|a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.qiannan.gov.cn/{callmodel.sql_model.list_rawid}/index_1.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99602'
            article_json["url"] = url
            article_json["title"] = li.xpath('h2/a/@title|a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('h2/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_qiannanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_qiannanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="DocTitle"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"文") and contains(text(),"号：")]/following::li[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"索") and contains(text(),"号：")]/following::li[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"信息分类：")]/following::li[1]//text()').extract()).strip()
    legal_status = re.findall("var isok='(.*?)'", html)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall("var Publisher='(.*?)'", html)
    organ = organ[0].strip() if organ else ''
    if not organ:
        organ = re.findall("Publisher='(.*?)'", html)
        organ = organ[-1].strip() if organ else ''

    if organ.startswith('州'):
        organ = '贵州省黔南布依族苗族自治' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99602'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "QIANNAN"
    zt_provider = "qiannangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   贵州省黔西南布依族苗族自治州
def policy_qxnlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[contains(@class,"con")]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.qxn.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99603'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_qxnarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_qxnarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//text()').extract()).strip()
    if '\n' in title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"索") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="Xxgk_Info Box MT20"]//li[contains(text(),"信息分类：")]/following::li[1]//text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"成文日期：")]/following::td[1]//text()').extract()).strip()
    legal_status = re.findall('txt_sfyx.*?var str = "(.*?)"', html, re.S)
    legal_status = '有效' if legal_status and legal_status[0].strip() == '是' else ''
    organ = re.findall('var str = "(.*?)"', html, re.S)
    if not organ:
        organ = re.findall('var str_1 = "(.*?)"', html, re.S)
    organ = organ[0].strip() if organ else ''
    if organ.startswith('州'):
        organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@id="Zoom"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99603'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "QXN"
    zt_provider = "qxngovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区发展和改革委员会
def policy_drcxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="gl-cons-rgs"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://drc.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99604'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/font/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = '20' + li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_drcxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_drcxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="xl-atl-conts"]//h3//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_date = clean_pubdate(''.join(res.xpath('//p[@class="atl-sub"]/span[1]/text()').extract()).strip())
    pub_year = pub_date[:4]
    if not pub_date:
        pub_date = clean_pubdate(article_json['pub_date'])
        pub_year = pub_date[:4]
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="xl-atl-plists"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99604'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "DRCXIZANG"
    zt_provider = "drcxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区经济和信息化厅
def policy_jxtxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('共<strong>(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="news_list"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://jxt.xizang.gov.cn/{callmodel.sql_model.list_rawid}?pageNum=1'
            url = parse.urljoin(base_url, href)
            rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99605'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/span[1]/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/span[2]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jxtxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jxtxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="content"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="text"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99605'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JXTXIZANG"
    zt_provider = "jxtxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区科学技术厅
def policy_stixizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="gl-l"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://sti.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99606'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_stixizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_stixizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[@class="xl-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="xl-articlecont xl-article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99606'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "STIXIZANG"
    zt_provider = "stixizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区教育厅
def policy_eduxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('页数:.*?/(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"list{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://edu.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99607'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_eduxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_eduxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    # pub_date = clean_pubdate(article_json['pub_date'])
    # pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="content-box"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_date_info = ''.join(res.xpath('//span[@class="date"]/text()').extract()).strip()
    pub_date = clean_pubdate(pub_date_info)
    pub_year = pub_date[:4]
    if not pub_date:
        raise Exception

    fulltext_xpath = '//div[@class="content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99607'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "EDUXIZANG"
    zt_provider = "eduxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区民政厅
def policy_mztxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="gl-list"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://mzt.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99608'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_mztxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_mztxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[@class="xl-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="xl-article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99608'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "MZTXIZANG"
    zt_provider = "mztxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区人力资源和社会保障厅
def policy_hrssxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="gl-cons-rgs"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://hrss.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99609'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_hrssxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_hrssxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="xl-atl-conts"]/h3//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="xl-atl-plists"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99609'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HRSSXIZANG"
    zt_provider = "hrssxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区农业农村厅
def policy_nynctxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[contains(@class,"nyj-xwit1")]')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://nynct.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99610'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_nynctxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_nynctxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="nyj-xlt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="nyj-xlc"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99610'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NYNCTXIZANG"
    zt_provider = "nynctxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区住房和城乡建设厅
def policy_zjtxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="u1"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://zjt.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.qxn.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99611'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zjtxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zjtxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="xl-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[@class="xl-article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99611'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZJTXIZANG"
    zt_provider = "zjtxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区卫生健康委员会
def policy_wjwxizanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="wrap2_ul"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://wjw.xizang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99612'
            article_json["url"] = url
            article_json["title"] = ''.join(li.xpath('a/font/text()|a/text()').extract()) .strip()
            article_json["pub_date"] = li.xpath('a/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_wjwxizangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_wjwxizangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="c_title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xxgkml-xl"]//strong[contains(text(),"文") and contains(text(),"号：")]/following::td[1]//text()').extract()).strip()
    # if organ.startswith('州'):
    #     organ = '贵州省黔西南布依族苗族自治' + organ

    fulltext_xpath = '//div[contains(@class,"TRS_UEDITOR")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99612'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "WJWXIZANG"
    zt_provider = "wjwxizanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info1 = get_file_info(data, res, f'({fulltext_xpath})')
    file_info2 = get_file_info(data, res, f'(//div[@class="fjxz"])')
    file_info = file_info1 + file_info2
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区拉萨市
def policy_lasalist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\('page-div',(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'xwzx/tzgg_73876' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="list"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.lasa.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99613'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('div//span[@class="time"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_lasalist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['totalPage']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['results']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['url']
            base_url = 'http://www.lasa.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99613'
            article_json["url"] = url
            article_json["title"] = li['title']
            article_json["pub_date"] = li['publishedTime']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_lasaarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_lasaarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zfxxgk_content"]/h2//text()|//div[@class="detail"]/h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = re.findall('var wz = "(.*?)"', html)
    if not pub_no:
        pub_no = re.findall('var fwzh1 = "(.*?)"', html)
    if not pub_no:
        pub_no = re.findall('var fwzh2 = "(.*?)"', html)
    pub_no = pub_no[0].strip() if pub_no else ''
    index_no = ''.join(res.xpath('//div[@class="syhbox"]//li[contains(text(),"索引号：")]//text()').extract()).strip()
    index_no = index_no.split('：')[-1].strip()
    subject = ''.join(res.xpath('//div[@class="syhbox"]//li[contains(text(),"主题分类：")]//text()').extract()).strip()
    subject = subject.split('：')[-1].strip()
    date_str = ''.join(res.xpath('//span[@class="cwrq"]/text()').extract()).strip()
    if date_str:
        date_stamp = datetime.datetime.fromtimestamp(int(date_str) / 1000)
        written_date = datetime.datetime.strftime(date_stamp, "%Y-%m-%d %H:%M:%S")
    else:
        written_date = ''
    organ = ''.join(res.xpath('//div[@class="syhbox"]//li[contains(text(),"发文机关：")]//text()').extract()).strip()
    organ = organ.split('：')[-1].strip()
    # if not organ:
    #     organ_info = ''.join(res.xpath('//p[@class="easysite-news-describe"]//text()').extract()).strip()
    #     organ = organ_info.split('来源：')[-1].split('')[0].strip()
    if organ.startswith('市'):
        organ = '拉萨' + organ

    fulltext_xpath = '//div[@id="NewsContent"]|//div[@class="gknbxq_detail"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99613'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "LASA"
    zt_provider = "lasagovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区日喀则市
def policy_rikazelist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("parseInt\('(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[-2]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[@class="wrapper-content"]/ul/li|//div[@class="lists"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.rikaze.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://www.rikaze.gov.cn'
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            rawid = url.split('cid=')[-1]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99614'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/p/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_rikazearticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_rikazearticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="report-title"]//h3//text()|//div[@class="title-main"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"文号：")]//text()').extract()).strip()
    pub_no = pub_no.split('：')[-1].strip()
    index_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"索引号:")]//text()').extract()).strip()
    index_no = index_no.split(':')[-1].strip()
    written_date = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"成文日期:")]//text()').extract()).strip()
    written_date = written_date.split(':')[-1].strip()
    organ = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"发布机构:")]//text()').extract()).strip()
    organ = organ.split(':')[-1].strip()
    if organ.startswith('市'):
        organ = '日喀则' + organ

    fulltext_xpath = '//div[@class="report-content"]|//div[@class="news-content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99614'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "RIKAZE"
    zt_provider = "rikazegovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区昌都市
def policy_changdulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\('page-div',(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[@class="gknr_list"]/dl/dd')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.rikaze.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://www.changdu.gov.cn'
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99615'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_changduarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_changduarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="gknbxq_top"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"文号：")]//text()').extract()).strip()
    # pub_no = pub_no.split('：')[-1].strip()
    # index_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"索引号:")]//text()').extract()).strip()
    # index_no = index_no.split(':')[-1].strip()
    # written_date = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"成文日期:")]//text()').extract()).strip()
    # written_date = written_date.split(':')[-1].strip()
    # organ = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"发布机构:")]//text()').extract()).strip()
    # organ = organ.split(':')[-1].strip()
    # if organ.startswith('市'):
    #     organ = '日喀则' + organ

    fulltext_xpath = '//div[@class="gknbxq_detail"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99615'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "CHANGDU"
    zt_provider = "changdugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区林芝市
def policy_linzhilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\('page-div',(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="list-li"]/li|//ul[@class="zfxxgk-nr-cnet"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.rikaze.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://www.linzhi.gov.cn'
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99616'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/p/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_linzhiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_linzhiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="d_top"]//h2//text()|//div[@id="cnet-top"]//h4//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"文号：")]//text()').extract()).strip()
    # pub_no = pub_no.split('：')[-1].strip()
    # index_no = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"索引号:")]//text()').extract()).strip()
    # index_no = index_no.split(':')[-1].strip()
    # written_date = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"成文日期:")]//text()').extract()).strip()
    # written_date = written_date.split(':')[-1].strip()
    # organ = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"发布机构:")]//text()').extract()).strip()
    # organ = organ.split(':')[-1].strip()
    # if organ.startswith('市'):
    #     organ = '日喀则' + organ

    fulltext_xpath = '//div[@id="cnet-warp"]|//div[@class="d_center"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99616'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "LINZHI"
    zt_provider = "linzhigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区山南市
def policy_shannanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\((\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'jdhy/jdzc' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//ul[@class="cm-news-list no-btop"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.linzhi.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99617'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_shannanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_shannanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="gl-content"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//ul[contains(@class,"article-info")]//span[contains(text(),"文号：")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//ul[contains(@class,"article-info")]//span[contains(text(),"索引号")]/following::span[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('///ul[contains(@class,"article-info")]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[contains(@class,"report-info")]//p[contains(text(),"成文日期:")]//text()').extract()).strip()
    organ = ''.join(res.xpath('//ul[contains(@class,"article-info")]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '山南' + organ

    fulltext_xpath = '//div[@class="article-content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99617'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "SHANNAN"
    zt_provider = "shannangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区那曲市
def policy_naqulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\('page_div',(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'c100011' in callmodel.sql_model.list_rawid :
            li_list = res.xpath('//ul[@id="lzyj_list"]/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('div[@class="bt"]/p/a/@href').extract_first()
                # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'http://www.naqu.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99618'
                article_json["url"] = url
                article_json["title"] = li.xpath('div[@class="bt"]/p/a/text()').extract_first().strip()
                article_json["pub_date"] = ''
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="pageList"]/ul/li|//div[@class="gknr_list"]/dl/dd')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'http://www.naqu.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99618'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_naquarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_naquarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="gknbxq_top"]//h2//text()|//ucaptitle//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"发文字号：")]/parent::li[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"索引号：")]/parent::li[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"有")]/parent::li[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"生成日期")]/parent::li[1]/span/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"发布机构")]/parent::li[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '那曲' + organ

    fulltext_xpath = '//div[@class="gknbxq_detail"]|//div[@id="zoomcon"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99618'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NAQU"
    zt_provider = "naqugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   西藏自治区阿里地区
def policy_allist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("p_next p_fun.*?/(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count + 1
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"/{total_page - page}.htm"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'c100011' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[contains(@class,"govnewslist")]/tr')[1:]
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/a/@href').extract_first()
            if page_index == 0:
                base_url = f'https://www.al.gov.cn/{callmodel.sql_model.list_rawid}.htm'
            else:
                base_url = f'https://www.al.gov.cn/{callmodel.sql_model.list_rawid}/1.htm'
            # base_url = f'http://www.naqu.gov.cn'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99619'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[3]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_alarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_alarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="contit"]//h3//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="contit"]//span[contains(text(),"发文字号：")]/parent::li[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="contit"]//span[contains(text(),"索引号：")]/parent::li[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"有")]/parent::li[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"生成日期")]/parent::li[1]/span/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="contit"]//span[contains(text(),"发文机构：")]/parent::li[1]/text()').extract()).strip()
    if organ.startswith('地区'):
        organ = '阿里' + organ

    fulltext_xpath = '//div[contains(@id,"vsb_content")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99619'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "AL"
    zt_provider = "algovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省发展和改革委员会
def policy_sndrcshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('total="(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'c100011' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[@class="sj"]/ul/li|//div[@class="m-gknr-list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://sndrc.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99620'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_sndrcshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_sndrcshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="con"]//h2//text()|//div[@class="m-gk-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"发文字号：")]/parent::div[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"索")]/parent::div[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"公文时效")]/parent::div[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"主题分类")]/parent::div[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"发布机构")]/parent::div[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@class="info"]|//div[@class="scroll_cont"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99620'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "SNDRCSHAANXI"
    zt_provider = "sndrcshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省工业和信息化厅
def policy_gxtshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('index_(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[-2]) if len(max_count)>1 else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'c100011' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[@class="c-zfxxgk-content-main-right-list"]/div')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://gxt.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99621'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('div/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gxtshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gxtshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="newsBody"]//h3//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"发文字号：")]/parent::div[1]/text()').extract()).strip()
    # index_no = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"索")]/parent::div[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"公文时效")]/parent::div[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"主题分类")]/parent::div[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"发布机构")]/parent::div[1]/text()').extract()).strip()
    # if organ.startswith('省'):
    #     organ = '陕西' + organ

    fulltext_xpath = '//div[@id="content"]|//div[@class="scroll_cont"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99621'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GXTSHAANXI"
    zt_provider = "gxtshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省科学技术厅
def policy_kjtshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'keywords' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//table[@class="list_table"]/tbody/tr')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[@class="bt"]/a/@href').extract_first()
                if not href:
                    continue
                # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'https://kjt.shaanxi.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99622'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[@class="bt"]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[@class="sj"]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="pList"]/ul/li|//ul[@class="textlist ifo_bdr"]/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'https://kjt.shaanxi.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99622'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/@title').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()|h3/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_kjtshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_kjtshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="info_title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"公文时效")]/following::td[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="fgwjbt"]//span[contains(text(),"主题分类")]/parent::div[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"发文单位")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@class="info_content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99622'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "KJTSHAANXI"
    zt_provider = "kjtshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省教育厅
def policy_jytshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('条/(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//li[@class="fdzdgknr-lb"]/parent::ul[1]/li')
        for index in range(0, len(li_list), 3):
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li_list[index].xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://jyt.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99623'
            article_json["url"] = url
            article_json["title"] = li_list[index].xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li_list[index+1].xpath('span[1]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jytshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jytshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="suoyin"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="suoyin"]//b[contains(text(),"索引号")]/parent::li[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="suoyin"]//b[contains(text(),"公文时效：")]/parent::li[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="suoyin"]//b[contains(text(),"别：")]/parent::li[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="suoyin"]//b[contains(text(),"成文日期：")]/parent::li[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="suoyin"]//b[contains(text(),"发布机构：")]/parent::li[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@id="article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99623'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JYTSHAANXI"
    zt_provider = "jytshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省民政厅
def policy_mztshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('，共<.*?>(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="right_box"]//div[@class="list"]/ul/li|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://mzt.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'contentId=' in url:
                rawid = re.findall('contentId=(.*?)&', url)[0]
            else:
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99624'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_mztshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_mztshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//text()|//div[@class="scroll_wrap"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="m-table-fixed"]//th[contains(text(),"发文文号")]/following::td[1]/text()').extract()).strip()
    pub_no = '' if pub_no == '无' else pub_no
    index_no = ''.join(res.xpath('//table[@class="m-table-fixed"]//th[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    index_no = '' if index_no == '无' else index_no
    legal_status = ''.join(res.xpath('//table[@class="m-table-fixed"]//th[contains(text(),"有效性：")]/following::td[1]//text()').extract()).strip()
    legal_status = '' if legal_status == '无' else legal_status
    subject = ''.join(res.xpath('//table[@class="m-table-fixed"]//th[contains(text(),"主题分类：")]/following::td[1]/text()').extract()).strip()
    subject = '' if subject == '无' else subject
    # written_date = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = res.xpath('//table[@class="m-table-fixed"]//th[contains(text(),"发布机构：")]/following::td[1]/text()').extract()
    organ = organ[0] if organ else ''
    organ = '' if organ == '无' else organ
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@class="scroll_cont"]|//div[@class="content clearfix"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99624'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "MZTSHAANXI"
    zt_provider = "mztshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省人力资源和社会保障厅
def policy_rstshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('共<b>(\d+)</b>页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@id="test"]/ul/li|//div[@class="article-list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'https://rst.shaanxi.gov.cn/newstyle/pub_newschannel.asp'
            url = parse.urljoin(base_url, href)
            if 'id=' not in url:
                continue
            # rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99626'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/span/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_rstshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_rstshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="m-gk-title"]//text()|//p[@class="text-cen"]//text()|//div[@class="title"]//text()|//h1[@class="xx-tit f-tac"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="table"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="table"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="table"]//td[contains(text(),"效力状态")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="table"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="table"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@class="content-con"]|//div[@class="text_content"]|//div[@class="scroll_cont"]|//div[@class="szf_lfNewsDetail"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99626'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "RSTSHAANXI"
    zt_provider = "rstshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省农业农村厅
def policy_nynctshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('option value="(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[-1]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//table[@class="form-content"]/tbody/tr|//div[@class="m-gkzd-list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[2]/a/@href|a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://nynct.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99627'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[2]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[5]/text()|span/text()').extract_first().strip()
            article_json["pub_no"] = ''.join(li.xpath('td[3]/text()').extract()).strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_nynctshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_nynctshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="m-gk-title"]//text()|//p[@class="text-cen"]//text()|//div[@class="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = article_json['pub_no']
    index_no = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"索引号")]/parent::li[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"有效性：")]/parent::li[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"主题分类：")]/parent::li[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"成文日期：")]/parent::li[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"发布机构：")]/parent::li[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[@class="text"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99627'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NYNCTSHAANXI"
    zt_provider = "nynctshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省住房和城乡建设厅
def policy_jsshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="body-short"]/ul/li|//div[@class="newsList"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://www.shannan.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'https://js.shaanxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99628'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('em/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jsshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jsshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="details-header"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = article_json['pub_no']
    # index_no = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"索引号")]/parent::li[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"有效性：")]/parent::li[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"主题分类：")]/parent::li[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"成文日期：")]/parent::li[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"发布机构：")]/parent::li[1]/text()').extract()).strip()
    # if organ.startswith('省'):
    #     organ = '陕西' + organ

    fulltext_xpath = '//div[@class="editor-body"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99628'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JSSHAANXI"
    zt_provider = "jsshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省卫生健康委员会
def policy_sxwjwshaanxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPage\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[contains(@class,"cm-news-list")]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://sxwjw.shaanxi.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99629'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_sxwjwshaanxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_sxwjwshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="message-box"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="cm-table-fixed zw-table"]//th[contains(text(),"文号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="cm-table-fixed zw-table"]//th[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="cm-table-fixed zw-table"]//th[contains(text(),"效力状态")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="cm-table-fixed zw-table"]//th[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"成文日期：")]/parent::li[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table[@class="cm-table-fixed zw-table"]//th[contains(text()," 发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ

    fulltext_xpath = '//div[contains(@class,"news-detail")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99629'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "SXWJWSHAANXI"
    zt_provider = "sxwjwshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省西安市
def policy_xalist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("parseInt\('(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                if page > 10:
                    dic = {"page_info": f"list/{page}"}
                else:
                    dic = {"page_info": f"{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//table[@class="table table-striped"]/tr|//article[@class="card card-type"]|//div[@class="m-lst36"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/a/@href|a/@href').extract_first()
            base_url = f'http://www.xa.gov.cn/{callmodel.sql_model.list_rawid}/1.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url or 'xa.' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99630'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/a/text()|a/@title|a/text()').extract_first().strip()
            if 'gsgg' in callmodel.sql_model.list_rawid:
                if not li.xpath('a/div/div[@class="date"]').extract_first():
                    continue
                day = li.xpath('a/div/div[@class="date"]').extract_first().strip()
                year = li.xpath('a/div/div[@class="year"]').extract_first().strip()
                article_json["pub_date"] = year + day
            else:
                article_json["pub_date"] = li.xpath('td[3]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_xaarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_xaarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@id="printContent"]/h1[@class="m-txt-tt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"有效性")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    impl_date = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"生效日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[contains(@class,"file-table")]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '西安' + organ

    fulltext_xpath = '//div[@id="article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99630'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "XA"
    zt_provider = "xagovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省宝鸡市
def policy_baojilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('<totalrecord>(\d+)</totalrecord>', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count / 25)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1, 3):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                start = (page - 1) * 25 + 1
                end = (page + 2) * 25
                if end >= max_count:
                    end = max_count
                dic = {"start": start, "end": end, "page_info": list_json["page_info"]}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//record')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://www.baoji.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99631'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)

        result.next_dicts.insert.append(di_model_next)
    return result


def policy_baojiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_baojiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)
    title = ''.join(res.xpath('//div[@class="zwxxgk_ndbgwz"]//h1//text()|//div[@class="article"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'bjxxgk' in html:
        pub_no = ''
        index_no = ''.join(res.xpath('//table[@class="bjxxgk"]//td[contains(text(),"索引号:")]/text()').extract()).strip()
        written_date = ''.join(res.xpath('//table[@class="bjxxgk"]//td[contains(text(),"生成日期:")]/text()').extract()).strip()
        legal_status = ''.join(res.xpath('//table[@class="bjxxgk"]//td[contains(text(),"有 效 性：")]/text()').extract()).strip()
        organ = ''.join(res.xpath('//table[@class="bjxxgk"]//td[contains(text(),"发布机构: ")]/text()').extract()).strip()
    else:
        pub_no = ''.join(res.xpath('//table[@id="table"]//strong[contains(text(),"文") and contains(text(),"号：")]/parent::td/text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@id="table"]//strong[contains(text(),"索引号：")]/parent::td/text()').extract()).strip()
        written_date = ''
        legal_status = ''.join(res.xpath('//span[@id="yxx"]/text()').extract()).strip()
        organ = ''.join(res.xpath('//table[@id="table"]//strong[contains(text(),"发布机构：")]/parent::td/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '宝鸡' + organ

    fulltext_xpath = '//div[@class="article"]|//div[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99631'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "BAOJI"
    zt_provider = "baojigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省咸阳市
def policy_xianyanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPage\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'keywords' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="xy-gl-list"]/li|//ul[@class="col-list"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://www.xianyang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99632'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_xianyangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_xianyangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="tit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"文号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"有效性")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"主题词")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//ul[@class="govinfo-lay-detail"]//span[contains(text(),"成文日期：")]/parent::li[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '咸阳' + organ

    fulltext_xpath = '//div[contains(@class,"TRS_UEDITOR")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        fulltext_xpath = '//div[@class="content"]'
        fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99632'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "XIANYANG"
    zt_provider = "xianyanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省铜川市
def policy_tongchuanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('，共<.*?>(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'gfxwj_search_list' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//table[@class="result-table"]/tbody/tr|//div[@class="list"]/ul/li|//div[@class="zw_list"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/a/@href|a/@href').extract_first()
            # base_url = f'http://www.xianyang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://www.tongchuan.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99633'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/a/text()|a/b/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[@class="performDate"]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_tongchuanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_tongchuanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zw_content_title"]//text()|//div[@class="ny-main"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"发文文号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"公文时效")]/following::td[1]//text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//div[@class="xy-xxgk-txt"]//td[contains(text(),"主题词")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"发文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '铜川' + organ

    fulltext_xpath = '//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99633'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "TONGCHUAN"
    zt_provider = "tongchuangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省渭南市
def policy_weinanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//div[@class="table"]/div')
        if not li_list:
            li_list = res.xpath('//div[@class="list_box"]//div[contains(@class,"m-lst")]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('div[1]/a/@href|a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.weinan.gov.cn/{callmodel.sql_model.list_rawid}/1.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99634'
            article_json["url"] = url
            article_json["title"] = li.xpath('div[1]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('div[3]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_weinanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_weinanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="infoTitle"]//text()|//h1[@class="m-txt-tt"]/text()|').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if 'class="table"' in html:
        pub_no = ''.join(res.xpath('//div[@class="table"]//div[contains(text(),"发文字号")]/following::div[1]/text()').extract()).strip()
        index_no = ''.join(res.xpath('//div[@class="table"]//div[contains(text(),"索")]/following::div[1]/text()').extract()).strip()
        legal_status = ''.join(res.xpath('//div[@class="table"]//div[contains(text(),"有")]/following::div[1]/text()').extract()).strip()
        # subject = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
        subject_word = ''
        written_date = ''.join(res.xpath('//div[@class="table"]//div[contains(text(),"成文日期")]/following::div[1]/text()').extract()).strip()
        organ = ''.join(res.xpath('//div[@class="table"]//div[contains(text(),"发布机构")]/following::div[1]/text()').extract()).strip()
    else:
        pub_no = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="文"]/following::td[1]/text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="索"]/following::td[1]/text()').extract()).strip()
        legal_status = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="有"]/following::td[1]/text()').extract()).strip()
        # subject = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
        subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
        written_date = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="签发日期"]/following::td[1]/text()').extract()).strip()
        organ = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="发布机构"]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '渭南' + organ

    fulltext_xpath = '//div[@class="infoContent"]|//div[@class="m-gk-artcle gz-article"]|//div[@id="article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99634'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "WEINAN"
    zt_provider = "weinangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省延安市
def policy_yananlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("maxPage = parseInt\('(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if '/zc/' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//div[@class="table"]/div')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('div[1]/a/@href').extract_first()
                base_url = f'http://www.yanan.gov.cn/{callmodel.sql_model.list_rawid}/1.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99635'
                article_json["url"] = url
                article_json["title"] = li.xpath('div[1]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('div[3]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="m-lst42"]/ul/li|//div[@class="m-lst"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                base_url = f'http://www.yanan.gov.cn/{callmodel.sql_model.list_rawid}/1.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99635'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_yananarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_yananarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="m-txt-tt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="table-tr"]//div[contains(text(),"发文字号")]/following::div[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="table-tr"]//div[contains(text(),"索") and contains(text(),"号")]/following::div[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="table-tr"]//div[contains(text(),"有") and contains(text(),"性")]/following::div[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('///div[@class="table-tr"]//div[contains(text(),"成文日期")]/following::div[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="table-tr"]//div[contains(text(),"发布机构")]/following::div[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '延安' + organ

    fulltext_xpath = '//div[@id="article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99635'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "YANAN"
    zt_provider = "yanangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省榆林市
def policy_yllist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'tm_id=371' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//ul[@id="zfwjlist"]/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('span[@class="wenj-title"]/a/@href').extract_first()
                # base_url = f'http://www.xianyang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'http://www.yl.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99636'
                article_json["url"] = url
                article_json["title"] = li.xpath('span[@class="wenj-title"]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span[@class="wenj-time"]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//div[@class="u-list"]/ul/li|//div[@class="m-gkzd-list"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                # base_url = f'http://www.xianyang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                base_url = f'http://www.yl.gov.cn'
                # url = parse.urljoin(base_url, href)
                if 'http' in href:
                    url = href
                else:
                    url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99636'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_ylarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_ylarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="m-ct-tt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = re.findall('gk_validity = "(.*?)"', html)
    legal_status = legal_status[0].strip() if legal_status else ''
    # subject = ''.join(res.xpath('//div[@class="zwwh"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="info_head"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '榆林' + organ

    fulltext_xpath = '//div[@class="m-ct-artcle"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99636'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "YL"
    zt_provider = "ylgovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省汉中市
def policy_hanzhonglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("createPageHTML\('page_div',(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'tm_id=371' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//table[@class="more-list-table"]/tbody/tr|//div[@class="m-lst36"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/a/@href|a/@href').extract_first()
            # base_url = f'http://www.xianyang.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://www.hanzhong.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99637'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[3]/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_hanzhongarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_hanzhongarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="m-txt-tt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"有效性")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"发文时间")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"发文机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '汉中' + organ

    fulltext_xpath = '//div[@id="article"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99637'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HANZHONG"
    zt_provider = "hanzhonggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省安康市
def policy_ankanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("/共(\d+)页<", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'tm_id=371' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="putong"]/ul[@class="newlist"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.ankang.gov.cn/{callmodel.sql_model.list_rawid}.html'
            url = parse.urljoin(base_url, href)
            url = href.replace('https', 'http')
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99638'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/font/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span[@class="date"]/text()|span[@class="time"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_ankangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_ankangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h2[@class="gkzn-tit"]//text()|//div[@class="detaTit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="gkhead"]//td[text()="文号"]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="gkhead"]//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="gkhead"]//td[text()="有效性"]/following::td[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="gkhead"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="gkhead"]//td[contains(text(),"公开责任部门")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '安康' + organ

    fulltext_xpath = '//div[@id="fontzoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99638'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ANKANG"
    zt_provider = "ankanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   陕西省商洛市
def policy_shangluolist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('p_next p_fun".*?/(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count + 1
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"/{total_page - page}.htm"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'tm_id=371' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="xxgklist"]//table/tbody/tr|//ul[@class="lzyjList"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('ts[2]/a/@href|a/@href').extract_first()
            if page_index == 0:
                base_url = f'http://www.shangluo.gov.cn/{callmodel.sql_model.list_rawid}.htm'
            else:
                base_url = f'http://www.shangluo.gov.cn/{callmodel.sql_model.list_rawid}/1.htm'
            # base_url = f'http://www.ankang.gov.cn'
            url = parse.urljoin(base_url, href)
            # if 'http' in href:
            #     url = href.replace('https', 'http')
            # else:
            #     url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99639'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[2]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[4]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_shangluoarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_shangluoarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"文　　号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"索 引 号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"效力状态")]/following::td[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="file-table"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="gkhead"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '商洛' + organ

    fulltext_xpath = '//div[contains(@id,"vsb_content")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99639'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "SHANGLUO"
    zt_provider = "shangluogovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省发展和改革委员会
def policy_fgwqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'tm_id=371' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="fgw-file-box"]/ul/li|//div[@class="con"]/p|//div[@class="zfxxgk_zdgkc"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('p[@class="tt"]/a/@href|a/@href').extract_first()
            base_url = f'http://fgw.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99640'
            article_json["url"] = url
            article_json["title"] = li.xpath('p[@class="tt"]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span[@class="date rt"]/text()|span[@class="d"]/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_fgwqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_fgwqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="xltit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"发文字号")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"索引号")]/following::span[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"是否有效")]/following::td[1]/text()').extract()).strip()
    legal_status = "有效" if legal_status == '是' else ''
    subject = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"成文日期")]/following::span[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '青海' + organ

    fulltext_xpath = '//div[contains(@class,"fgw-art-tt")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99640'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "FGWQINGHAI"
    zt_provider = "fgwqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省工业和信息化厅
def policy_gxgzqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'webaspx' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//td[@class="yhhei15"]//table/tr')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[1]/a/@href').extract_first()
                if not href:
                    continue
                base_url = f'http://gxgz.qinghai.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99641'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[1]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[2]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//table[@class="lxzdls"]/tr')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                base_url = f'http://gxgz.qinghai.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99641'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[5]/text()').extract_first().strip()
                article_json["pub_no"] = ''.join(li.xpath('td[3]/text()').extract()).strip()
                article_json["written_date"] = ''.join(li.xpath('td[4]/text()').extract()).strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gxgzqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gxgzqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="zdgk_xlbt"]//text()').extract()).strip()
    if '\n' in title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
        title = title.replace('<br>', '').replace('  ', '')
    if not title:
        title = article_json['title'].strip()
    pub_no = article_json.get('pub_no', '')
    written_date = article_json.get('written_date', '')

    fulltext_xpath = '//p/parent::*[@class="yhhei15"]|//font[@id="Zoom"]|//div/parent::*[@class="yhhei15"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99641'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GXGZQINGHAI"
    zt_provider = "gxgzqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省科学技术厅
def policy_kjtqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)条<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        if 'cate_id/00' in callmodel.sql_model.list_rawid:
            num = 15
        else:
            num = 20
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{page * num}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'cate_id/00' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//ul[@class="list_ul"]/table/tbody/tr')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                base_url = f'https://kjt.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/0'
                url = parse.urljoin(base_url, href)
                rawid = url.split('/')[-1]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99642'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[3]/text()').extract_first().strip()
                article_json["pub_no"] = ''.join(li.xpath('td[1]/span/text()').extract()).strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//ul[@class="list_ul"]/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                base_url = f'https://kjt.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/0'
                url = parse.urljoin(base_url, href)
                rawid = url.split('/')[-1]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99642'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('a/span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_kjtqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_kjtqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="show_title"]//h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = article_json.get('pub_no', '')
    # index_no = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"索引号")]/following::span[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="xxgkcont"]//td[contains(text(),"是否有效")]/following::td[1]/text()').extract()).strip()
    # legal_status = "有效" if legal_status == '是' else ''
    # subject = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = pub_no = article_json.get('written_date', '')
    # organ = ''.join(res.xpath('//div[@id="printStart1"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    # if organ.startswith('省'):
    #     organ = '青海' + organ

    fulltext_xpath = '//div[@id="show_p"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99642'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "KJTQINGHAI"
    zt_provider = "kjtqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省科学技术厅
def policy_jytqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = para_dicts["data"]["1_1"]['html'].split('rrooww')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.split('ccooll')[0]
            if 'namelan' in callmodel.sql_model.list_rawid:
                url = f'http://jyt.qinghai.gov.cn/gk/zwgk/idoc.cshtml?namepid={href}'
            else:
                url = f'http://jyt.qinghai.gov.cn/gk/tzgg/idoc.cshtml?namepid={href}'
            rawid = href
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99643'
            article_json["url"] = url
            # article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
            # article_json["pub_date"] = li.xpath('td[3]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jytqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jytqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    provider_url = article_json['url']
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[@id="ititle"]//text()|//p[@id="ibiaoti"]//text()').extract()).strip()
    pub_date_info = ''.join(res.xpath('//p[@id="itime"]//text()|//div[@id="r21"]//strong[contains(text(),"发文日期")]/parent::li[1]/text()').extract()).strip()
    if '年' in pub_date_info:
        year = re.findall('(\d+)年', pub_date_info)
        year = year[0].rjust(4, '0') if year else '0000'
        mouth = re.findall('(\d+)月', pub_date_info)
        mouth = mouth[0].rjust(2, '0') if mouth else '00'
        day = re.findall('(\d+)日', pub_date_info)
        day = day[0].rjust(2, '0') if day else '00'
        pub_date = year + mouth + day
    else:
        pub_date = clean_pubdate(pub_date_info)
    pub_year = pub_date[:4]
    if not title or not pub_date:
        raise Exception

    pub_no = ''.join(res.xpath('//div[@id="r21"]//strong[contains(text(),"文号")]/parent::li[1]/text()').extract()).strip()
    pub_no = pub_no.replace('：', '').strip()
    index_no = ''.join(res.xpath('//div[@id="r21"]//strong[contains(text(),"索引号")]/parent::li[1]/text()').extract()).strip()
    index_no = index_no.replace('：', '').strip()
    subject = ''.join(res.xpath('//div[@id="r21"]//strong[contains(text(),"主题分类")]/parent::li[1]/text()').extract()).strip()
    subject = subject.replace('：', '').strip()
    subject_word = ''.join(res.xpath('//div[@id="r21"]//strong[contains(text(),"主题词")]/parent::li[1]/text()').extract()).strip()
    subject_word = subject_word.replace('：', '').strip()
    organ = ''.join(res.xpath('//div[@id="r21"]//strong[contains(text(),"发文主体")]/parent::li[1]/text()').extract()).strip()
    organ = organ.replace('：', '').strip()
    if organ.startswith('省'):
        organ = '青海' + organ

    fulltext_xpath = '//div[@id="r22"]|//p[@id="icnt"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99643'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JYTQINGHAI"
    zt_provider = "jytqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省民政厅
def policy_mztqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('条/(\d+)页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'cate_id/00' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="ul3"]/li|//dd[@class="listBox2"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('div[@class="b"]/h4/a/@href|a/@href').extract_first()
            # base_url = f'http://fgw.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'https://mzt.qinghai.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99644'
            article_json["url"] = url
            article_json["title"] = li.xpath('div[@class="b"]/h4/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('div[@class="b"]/div[@class="addi"]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_mztqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_mztqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h3[@class="tm"]//text()|//div[@class="a"]//h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发文字号")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"索")]/following::span[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"公文时效")]/following::span[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = pub_no = article_json.get('written_date', '')
    organ = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '青海' + organ

    fulltext_xpath = '//div[@id="contentlf"]|//div[@id="content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99644'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "MZTQINGHAI"
    zt_provider = "mztqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省财政厅
def policy_cztqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['paging']['total_pages_count']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['ID']
            url = f'http://czt.qinghai.gov.cn/zfxxgk/query/{href}.html'
            temp["rawid"] = href
            temp["sub_db_id"] = '99645'
            article_json["url"] = url
            article_json["title"] = li['Title']
            article_json["pub_date"] = li['ReleaseDate']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_cztqinghailist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('/(\d+).html">尾页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'cate_id/00' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="query-list"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href_info = li.xpath('@onclick').extract_first()
            href = re.findall("open\('(.*?)'", href_info)[0]
            base_url = f'http://czt.qinghai.gov.cn/list/XWGL_ZXQYZCXXZL/1.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99645'
            article_json["url"] = url
            article_json["title"] = li.xpath('@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('div[2]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_cztqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_cztqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="container"]/h1//text()|//div[@class="index-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="index-table"]//td[contains(text(),"发文字号：")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="index-table"]//td[contains(text(),"索引号：")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table[@class="index-table"]//td[contains(text(),"成文日期：")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="index-table"]//td[contains(text(),"公文时效：")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="index-table"]//td[contains(text(),"发布机构：")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="con "]//b[contains(text(),"发布机构：")]/ancestor::td[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '青海' + organ
    if organ.startswith('厅'):
        organ = '青海省财政' + organ

    fulltext_xpath = '//div[@id="html001"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99645'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "CZTQINGHAI"
    zt_provider = "cztqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省人力资源和社会保障厅
def policy_rstqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall(';1/(\d+)&', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'cate_id/00' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="new_jobs_con"]/ul/li|//td[@class="info"]/parent::tr[1]')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[@class="info"]/a/@href|a/@href').extract_first()
            base_url = f'http://rst.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99646'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[@class="info"]/a/text()|a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[2]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_rstqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_rstqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="content_tt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    fulltext_xpath = '//div[@class="content_con"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99646'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "RSTQINGHAI"
    zt_provider = "rstqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省农业农村厅
def policy_nynctqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['data']['right']['pagenum']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['data']['right']['list']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['id']
            url = f'http://nynct.qinghai.gov.cn/detail/?Id={href}&is_newmodel=1&msgbtn=0'
            art_url = f'http://223.220.143.63:8091/api/nynct.article/text?id={href}&status=0&newmodel=1'
            # rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = href
            temp["sub_db_id"] = '99647'
            article_json["url"] = url
            article_json["art_url"] = art_url
            article_json["title"] = li['title']
            article_json["pub_date"] = li['addtime_text']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_nynctqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_nynctqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html_json = json.loads(callmodel.para_dicts['data']['1_1']['html'])
    html = html_json['data']['text']['content']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = article_json['pub_date']
    pub_year = pub_date[:4]
    res = Selector(text=html)
    title =html_json['data']['text']['title']
    if not title:
        title = article_json['title'].strip()
    if not title:
        raise Exception

    fulltext = html

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99647'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NYNCTQINGHAI"
    zt_provider = "nynctqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'(//body)')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省住房和城乡建设厅
def policy_zjtqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('共(\d+)页&', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'cate_id/00' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="lxzd"]/div')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'http://zjt.qinghai.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99648'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/span[@class="w1"]/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/span[@class="w2"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zjtqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zjtqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="zmymb"]//h6//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发文字号")]/following::span[1]/text()').extract()).strip()
    # index_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"索")]/following::span[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"公文时效")]/following::span[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = pub_no = article_json.get('written_date', '')
    # organ = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    # if organ.startswith('省'):
    #     organ = '青海' + organ

    fulltext_xpath = '//div[@class="zmymb"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99648'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZJTQINGHAI"
    zt_provider = "zjtqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省卫生健康委员会
def policy_wsjkwqinghailist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>/(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'cate_id/00' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//li[@class="line01"]|//li[@class="xxgk_content_title"]')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a[2]/@href').extract_first()
            # base_url = f'http://fgw.qinghai.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            base_url = f'https://wsjkw.qinghai.gov.cn'
            # url = parse.urljoin(base_url, href)
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99649'
            article_json["url"] = url
            article_json["title"] = li.xpath('a[2]/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_wsjkwqinghaiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_wsjkwqinghaiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[@class="text_title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发文字号")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"索")]/following::span[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"公文时效")]/following::span[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = pub_no = article_json.get('written_date', '')
    organ = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '青海' + organ

    fulltext_xpath = '//div[@class="page_text"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99649'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "WSJKWQINGHAI"
    zt_provider = "wsjkwqinghaigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省西宁市
def policy_xininglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPage\((\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'xwdt/tzgg' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//div[@class="gl-list"]/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('a/@href').extract_first()
                base_url = f'https://www.xining.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                # base_url = f'https://wsjkw.qinghai.gov.cn'
                url = parse.urljoin(base_url, href)
                # if 'http' in href:
                #     url = href
                # else:
                #     url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99650'
                article_json["url"] = url
                article_json["title"] = li.xpath('a/div[@class="info rt"]/p[@class="tit"]/text()').extract_first().strip()
                day = li.xpath('a/div[@class="date lf"]/p[@class="day"]/text()').extract_first().strip()
                ym = li.xpath('a/div[@class="date lf"]/p[@class="ym"]/text()').extract_first().strip()
                article_json["pub_date"] = ym + day
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//ul[@class="cm-news-list"]/li|//div[@class="tabItem"]/ul/li')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('div/a/@href|a/@href').extract_first()
                base_url = f'https://www.xining.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
                # base_url = f'https://wsjkw.qinghai.gov.cn'
                url = parse.urljoin(base_url, href)
                # if 'http' in href:
                #     url = href
                # else:
                #     url = base_url + href
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99650'
                article_json["url"] = url
                article_json["title"] = li.xpath('div/a/text()|a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('span[last()]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_xiningarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_xiningarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="xx-tit f-tac"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"文 号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"效力状态")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    # written_date = pub_no = article_json.get('written_date', '')
    organ = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '西宁' + organ

    fulltext_xpath = '//div[@id="wenzhangzhengwen"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99650'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "XINING"
    zt_provider = "xininggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省海东市
def policy_haidonglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'lmid=26' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//tr[@class="lxzd"]')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                if not href:
                    continue
                base_url = f'http://www.haidong.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99651'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[5]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        elif 'lmid=23' in callmodel.sql_model.list_rawid or 'lmid=382' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//td[@class="heizi12"]/table/tr')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[1]/a/@href').extract_first()
                if not href:
                    continue
                base_url = f'http://www.haidong.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99651'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[1]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[2]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//td[@class="heizi12"]/table/tr')[1:]
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                if not href:
                    continue
                base_url = f'http://www.haidong.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99651'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[3]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_haidongarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_haidongarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="szfl_zi3"]//text()|//td[@class="top_zi"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//td[contains(text(),"发文字号：")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//td[contains(text(),"索引号：")]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="szf_zw-table"]//th[contains(text(),"效力状态")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//td[contains(text(),"主题分类：")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="contentTable"]//td[text()="主"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//td[contains(text(),"成文日期：")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//td[contains(text(),"发文机关：")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '海东' + organ

    fulltext_xpath = '//p/parent::*[@class="heizi12"]|//div/parent::*[@class="heizi12"]|//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99651'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HAIDONG"
    zt_provider = "haidonggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省海北藏族自治州
def policy_haibeilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('pageCount:(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="navjz clearfix"]/ul/li|//div[@class="xxgk_nav_con"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a[1]/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.haibei.gov.cn/{callmodel.sql_model.list_rawid}pageIndex=1'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99652'
            article_json["url"] = url
            article_json["title"] = li.xpath('a[1]/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('span[contains(@class,"date")]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_haibeiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_haibeiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="newstitle"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="文"]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="索"]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="内容分类："]/following::td[1]/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="成文日期："]/following::td[1]/text()').extract()).strip()
    invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    invalid_date = '' if invalid_date == '暂无' else invalid_date
    organ = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="发布机构："]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省海北藏族自治' + organ

    fulltext_xpath = '//div[contains(@class,"j-fontContent")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99652'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HAIBEI"
    zt_provider = "haibeigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省黄南藏族自治州
def policy_huangnanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        # tcount = html_json['page']['total']
        total_page = html_json['pagenum']
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": list_json['page_info'].replace(':0', f':{page}')}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        li_list = html_json['value']
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['navigationUrl']
            base_url = f'http://www.huangnan.gov.cn'
            if 'http' in href:
                url = href
            else:
                url = base_url + href
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99653'
            article_json["url"] = url
            article_json["title"] = li['title']
            article_json["pub_date"] = li['adddate']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_huangnanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_huangnanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h3[@class="my-3 title"]//text()|//div[@class="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="indexBox"]//b[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="indexBox"]//b[contains(text(),"索 引 号")]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="indexBox"]//b[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="indexBox"]//b[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="indexBox"]//b[contains(text(),"发文机关")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省黄南藏族自治' + organ

    fulltext_xpath = '//div[contains(@class,"justify-content-center")]|//div[@class="cont"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99653'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HUANGNAN"
    zt_provider = "huangnangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省海南藏族自治州
def policy_hainanzhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('createPageHTML\((\d+)', para_dicts["data"]["1_1"]['html'])
        if not max_count:
            max_count = re.findall('pagenav_tail.*?index_(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="xy-gl-list"]/li|//div[@class="zfxxgk_zd2"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            base_url = f'http://www.hainanzhou.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            # base_url = f'http://www.haidong.gov.cn'
            url = parse.urljoin(base_url, href)
            # if 'http' in href:
            #     url = href
            # else:
            #     url = base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            # rawid = re.findall('\?id=(.*?)&', url)[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99654'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_hainanzhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_hainanzhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="tit"]//text()|//div[@class="scroll_main"]//h3//text()').extract()).strip()
    if '\n' in title:
        title = article_json['title'].strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发文字号")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"索")]/following::span[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    impl_date = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"生效日期")]/following::span[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省海南藏族自治' + organ

    fulltext_xpath = '//div[@id="zoom"]|//div[contains(@class,"TRS_Editor")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99654'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HAINANZHOU"
    zt_provider = "hainanzhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省果洛藏族自治州
def policy_guoluolist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}-{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//table[@id="table592"]//table//table/tr')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[1]/li/a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.guoluo.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99655'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[1]/li/a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[2]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_guoluoarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_guoluoarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//span[@class="fontbt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//td[contains(text(),"发文字号：")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//td[contains(text(),"索引号：")]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//td[contains(text(),"主题分类：")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//td[contains(text(),"成文日期：")]/following::td[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//td[contains(text(),"发文机关：")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省果洛藏族自治' + organ

    fulltext_xpath = '//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99655'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GUOLUO"
    zt_provider = "guoluogovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省玉树藏族自治州
def policy_yushuzhoulist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('>共(\d+)页', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                if 'html/' in callmodel.sql_model.list_rawid:
                    dic = {"page_info": f"List-{page}.html"}
                else:
                    dic = {"page_info": f"&pages={page + 1}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if 'html/' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//span[@class="font"]/table/tr')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[1]/li/a/@href').extract_first()
                if not href:
                    continue
                base_url = f'http://www.yushuzhou.gov.cn/{callmodel.sql_model.list_rawid}List.html'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99656'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[1]/li/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[2]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//tr[@class="lxzd"]')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/a/@href').extract_first()
                base_url = f'http://www.yushuzhou.gov.cn/{callmodel.sql_model.list_rawid}'
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99656'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[5]/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_yushuzhouarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_yushuzhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//table[@class="input7"]//td[@class="hei22"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//td[@class="hei22"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//td[contains(text(),"文") and contains(text(),"号:")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//td[contains(text(),"索") and contains(text(),"号:")]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//td[contains(text(),"主题分类:")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//td[contains(text(),"成文日期：")]/following::td[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//td[contains(text(),"发文机关：")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省玉树藏族自治' + organ

    fulltext_xpath = '//font[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99656'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "YUSHUZHOU"
    zt_provider = "yushuzhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   青海省海西蒙古族藏族自治州
def policy_haixilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('p_next p_fun.*?/(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count + 1
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"/{total_page - page}.htm"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        if page_index == 0:
            base_url = f'http://www.haixi.gov.cn/{callmodel.sql_model.list_rawid}.htm'
        else:
            base_url = f'http://www.haixi.gov.cn/{callmodel.sql_model.list_rawid}/1.htm'
        if 'zfxxgk/zfxsgk/zc/xzgfxwj' in callmodel.sql_model.list_rawid:
            li_list = res.xpath('//tr[@class="tml"]')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[2]/span/a/@href').extract_first()
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99657'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[2]/span/a/text()').extract_first().strip()
                article_json["pub_date"] = ''
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        else:
            li_list = res.xpath('//ul[@class="lb-news"]/li|//tr[contains(@id,"line")]')
            for li in li_list:
                temp = info_dicts.copy()
                temp["task_tag"] = temp["task_tag_next"]
                del temp["task_tag_next"]
                article_json = dict()
                href = li.xpath('td[1]/a/@href|a/@href').extract_first()
                url = parse.urljoin(base_url, href)
                if 'htm' not in url:
                    continue
                rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
                # rawid = re.findall('\?id=(.*?)&', url)[0]
                temp["rawid"] = rawid
                temp["sub_db_id"] = '99657'
                article_json["url"] = url
                article_json["title"] = li.xpath('td[1]/a/text()|a/text()').extract_first().strip()
                article_json["pub_date"] = li.xpath('td[2]/text()|span/text()').extract_first().strip()
                temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
                di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_haixiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_haixiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h2[@class="contit"]//text()|//td[@class="titzw"]//text()|//div[contains(@class,"article-tt")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="sefg"]//td[contains(text(),"文") and contains(text(),"号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="sefg"]//td[contains(text(),"索") and contains(text(),"号")]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="有"]/following::td[1]/text()').extract()).strip()
    # subject = ''.join(res.xpath('//td[contains(text(),"主题分类:")]/following::td[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//td[contains(text(),"成文日期：")]/following::td[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="sefg"]//td[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '青海省海西蒙古族藏族自治' + organ

    fulltext_xpath = '//div[contains(@id,"vsb_content")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99657'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "HAIXI"
    zt_provider = "haixigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info1 = get_file_info(data, res, f'({fulltext_xpath})')
    file_info2 = get_file_info(data, res, f'(//div[@class="zmhdlist"]/table)')
    file_info = file_info1 + file_info2
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省嘉峪关市
def policy_jyglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('count=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count/15)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        res = Selector(text=html_json['data']['html'])
        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="page-content"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.jyg.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99669'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('.//span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jygarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jygarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="article-top"]//p[@class="text-tag canRead"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//table[@class="layui-table"]//strong[contains(text(),"文　　号：")]/following::td[1]//text()').extract()).strip()
    # organ = re.findall('var str = "(.*?)"', html)
    # if not organ:
    #     organ = re.findall('var str_1 = "(.*?)"', html)
    # organ = organ[0].strip() if organ else ''
    # if organ.startswith('省'):
    #     organ = '贵州' + organ

    fulltext_xpath = '//div[@class="article-conter"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99669'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JYG"
    zt_provider = "jyggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省金昌市
def policy_jcslist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('<totalrecord>(\d+)</totalrecord>', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count / 25)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1, 3):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                start = (page - 1) * 25 + 1
                end = (page + 2) * 25
                if end >= max_count:
                    end = max_count
                dic = {"start": start, "end": end, "page_info": list_json["page_info"]}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//record')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            base_url = f'http://www.jcs.gov.cn'
            # url = parse.urljoin(base_url, href)
            url = href if 'http' in href else base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99670'
            article_json["url"] = url
            article_json["title"] = li.xpath('.//a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('tr/td[3]/text()|b/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)

        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jcslist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('count=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = re.findall('rows=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = int(num[0]) if num else 10
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": list_json['page_info'].replace('pageNo%22%3A1', f'pageNo%22%3A{page}')}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        res = Selector(text=html_json['data']['html'])
        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="page-content"]//table/tbody/tr')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('td[@class="table_bt"]/a/@href').extract_first()
            if not href:
                continue
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.jcs.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99670'
            article_json["url"] = url
            article_json["title"] = li.xpath('td[@class="table_bt"]/a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('td[@class="table_fbrq"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jcsarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jcsarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="title"]/div[@class="text-tag"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if not pub_date:
        pub_date_info = ''.join(res.xpath('//td[contains(text(),"发布日期：")]/text()').extract()).strip()
        pub_date = clean_pubdate(pub_date_info)
        pub_year = pub_date[:4]
    if not pub_date:
        pub_date_info = ''.join(res.xpath('//td[contains(text(),"发布日期")]/following::td[1]/text()').extract()).strip()
        pub_date = clean_pubdate(pub_date_info)
        pub_year = pub_date[:4]
    if not pub_date:
        raise Exception
    if 'class="xxgk-table"' in html:
        pub_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"索")]/following::td[1]/text()').extract()).strip()
        written_date = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
        # legal_status = ''.join(res.xpath('//table[@class="xx_tab"]//td[contains(text(),"时效：")]/following::td[1]/text()').extract()).strip()
        organ = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"制发机构")]/following::td[1]/text()').extract()).strip()
    else:
        pub_no = ''.join(res.xpath('//table[@class="table_biaotou"]//th[contains(text(),"文件编号：")]/following::td[1]/text()').extract()).strip()
        index_no = ''.join(res.xpath('//table[@class="table_biaotou"]//th[contains(text(),"索")]/following::td[1]/text()').extract()).strip()
        written_date = ''.join(res.xpath('//table[@class="table_biaotou"]//th[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
        # legal_status = ''.join(res.xpath('//table[@class="xx_tab"]//td[contains(text(),"时效：")]/following::td[1]/text()').extract()).strip()
        organ = ''.join(res.xpath('//table[@class="table_biaotou"]//th[contains(text(),"发布机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '金昌' + organ

    fulltext_xpath = '//div[@id="zoom"]|//div[@class="ty-content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99670'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JCS"
    zt_provider = "jcsgovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省白银市
def policy_baiyinlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('count=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = re.findall('rows=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = int(num[0]) if num else 10
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        res = Selector(text=html_json['data']['html'])
        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="page-content"]//ul/li|//div[@class="page-content"]//table/tr')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            if not href:
                continue
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'https://www.baiyin.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99671'
            article_json["url"] = url
            article_json["title"] = li.xpath('.//a/text()').extract_first().strip()
            pub_date = li.xpath('.//span[@class="listpubdate"]/text()').extract_first()
            if not pub_date:
                pub_date = li.xpath('span/text()|td[6]/text()').extract_first()
            article_json["pub_date"] = pub_date.strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_baiyinarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_baiyinarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="title"]//h1[@class="title text-tag"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"文") and contains(text(),"号：")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"索")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"生成日期：")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"时") and contains(text(),"效：")]/following::td[1]/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"关键字：")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"发布机构：")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '白银' + organ

    fulltext_xpath = '//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99671'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "BAIYIN"
    zt_provider = "baiyingovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省天水市
def policy_tianshuilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('<totalrecord>(\d+)</totalrecord>', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count / 25)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1, 3):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                start = (page - 1) * 25 + 1
                end = (page + 2) * 25
                if end >= max_count:
                    end = max_count
                dic = {"start": start, "end": end, "page_info": list_json["page_info"]}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//record')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            base_url = f'http://www.tianshui.gov.cn'
            # url = parse.urljoin(base_url, href)
            url = href if 'http' in href else base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99672'
            article_json["url"] = url
            article_json["title"] = li.xpath('.//a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span[@class="bt-data-time"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)

        result.next_dicts.insert.append(di_model_next)

    return result


def policy_tianshuilist1_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        list_rawid = callmodel.sql_model.list_rawid
        # if 'xxgkml' in list_rawid:
        li_list = re.findall('urls\[i\]=.*?i\+\+;', para_dicts["data"]["1_1"]['html'])
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = re.findall("urls\[i\]='(.*?)'", li)[0]
            base_url = f'http://www.tianshui.gov.cn'
            # url = parse.urljoin(base_url, href)
            url = href if 'http' in href else base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99672'
            article_json["url"] = url
            article_json["title"] = re.findall('headers\[i\]="(.*?)"', li)[0].strip()
            year = re.findall("year\[i\]='(.*?)'", li)[0].strip()
            month = re.findall("month\[i\]='(.*?)'", li)[0].strip()
            day = re.findall("day\[i\]='(.*?)'", li)[0].strip()
            article_json["pub_date"] = f"{year}{month}{day}"
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_tianshuiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_tianshuiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[contains(@class,"con-title1")]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="con "]//b[contains(text(),"发文字号：")]/ancestor::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="con "]//span[contains(text(),"索 引 号：")]/ancestor::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="con "]//b[contains(text(),"成文日期：")]/ancestor::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//table[@class="xx_tab"]//td[contains(text(),"时效：")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="con "]//b[contains(text(),"发布机构：")]/ancestor::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '天水' + organ

    fulltext_xpath = '//div[contains(@class,"main-txt1")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99672'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "TIANSHUI"
    zt_provider = "tianshuigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省武威市
def policy_gswuweilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('<totalrecord>(\d+)</totalrecord>', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count / 25)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1, 3):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                start = (page - 1) * 25 + 1
                end = (page + 2) * 25
                if end >= max_count:
                    end = max_count
                dic = {"start": start, "end": end, "page_info": list_json["page_info"]}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//record')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            base_url = f'https://www.gswuwei.gov.cn'
            # url = parse.urljoin(base_url, href)
            url = href if 'http' in href else base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99673'
            article_json["url"] = url
            article_json["title"] = li.xpath('.//a/text()').extract_first().strip()
            if '177' == callmodel.sql_model.list_rawid:
                pub_date = ''.join(li.xpath('span[@class="ipnone"]//text()').extract())
            elif '17977' == callmodel.sql_model.list_rawid:
                pub_date = li.xpath('div/p[@class="fgk_yxsj"]/a/text()').extract_first().strip()
            else:
                pub_date = li.xpath('span/text()').extract_first().strip()
            article_json["pub_date"] = pub_date.strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)

        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gswuweiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gswuweiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//span[@class="biaoti"]//text()|//div[@id="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table//td[contains(text(),"文件编号:")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table//td[contains(text(),"索引号:")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table//td[contains(text(),"成文日期:")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table//td[contains(text(),"有效性:")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table//td[contains(text(),"发布机构:")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '武威' + organ

    fulltext_xpath = '//div[@id="Zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99673'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GSWUWEI"
    zt_provider = "gswuweigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省张掖市
def policy_zhangyelist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('countPage = (\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"{list_json['page_info']}_{page}"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="zfxxgk_zdgkc"]/ul/li|//div[@class="content"]/div|//div[@class="zfxxgk_zd2"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'http://www.zhangye.gov.cn/{callmodel.sql_model.list_rawid}/index.html'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99674'
            article_json["url"] = url
            title = li.xpath('a/span[@class="bt"]/text()').extract_first()
            if not title:
                title = li.xpath('a/text()').extract_first()
            article_json["title"] = title.strip()
            article_json["pub_date"] = li.xpath('a/span[@class="data"]/text()|b/text()|span[@class="listpubdate"]/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zhangyearticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zhangyearticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="hx-xxgkxl-top"]/h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"文号")]/following::span[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"索引号")]/following::span[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"是否有效")]/following::span[1]/text()').extract()).strip()
    legal_status = '有效' if legal_status == '是' else ''
    # subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"关键词")]/following::span[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"生成日期")]/following::span[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '张掖' + organ

    fulltext_xpath = '//div[contains(@class,"hx-xxgkxl-wznr")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99674'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZHANGYE"
    zt_provider = "zhangyegovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省平凉市
def policy_pinglianglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('count=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = re.findall('rows=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = int(num[0]) if num else 10
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        res = Selector(text=html_json['data']['html'])
        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="page-content"]//ul/li|//div[@class="page-content"]//table/tr')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            if not href:
                continue
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'http://www.pingliang.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99675'
            article_json["url"] = url
            article_json["title"] = li.xpath('.//a/text()').extract_first().strip()
            if 'b96a6359619d40e3b79172c4ee09dca3' in callmodel.sql_model.list_rawid:
                article_json["pub_date"] = ''
            else:
                article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_pingliangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_pingliangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="content_h1 text-tag canRead"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    if not pub_date:
        pub_date_info = ''.join(res.xpath('//li[contains(text(),"时间：")]/text()').extract()).strip()
        pub_date = clean_pubdate(pub_date_info)
        pub_year = pub_date[:4]

    fulltext_xpath = '//div[@class="zw_content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99675'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "PINGLIANG"
    zt_provider = "pinglianggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省酒泉市
def policy_jiuquanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        max_count = html_json['data']['total']
        num = html_json['data']['rows']
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = html_json['data']['results']
        if not li_list:
            raise Exception
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li['url']
            base_url = f'http://www.jiuquan.gov.cn/common/search/{callmodel.sql_model.list_rawid}'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99676'
            article_json["url"] = url
            article_json["title"] = li['title']
            article_json["pub_date"] = li['publishedTimeStr']
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_jiuquanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_jiuquanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    fulltext_xpath = '//div[@id="UCAP-CONTENT"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99676'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JIUQUAN"
    zt_provider = "jiuquangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省庆阳市
def policy_zgqingyanglist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('/共(\d+)页<', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//ul[@class="newsList"]/li|//ul[@class="infoList"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'https://www.zgqingyang.gov.cn/{callmodel.sql_model.list_rawid}_1'
            url = parse.urljoin(base_url, href)
            # rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            rawid = url.split('/')[-1]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99677'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/text()').extract_first().strip()
            article_json["pub_date"] = li.xpath('span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_zgqingyangarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_zgqingyangarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h2[@class="title"]/text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # pub_no = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"文号")]/following::span[1]/text()').extract()).strip()
    # index_no = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"索引号")]/following::span[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"是否有效")]/following::span[1]/text()').extract()).strip()
    # legal_status = '有效' if legal_status == '是' else ''
    # # subject = ''.join(res.xpath('//div[@class="xlwz"]//span[contains(text(),"主题分类")]/following::span[1]/text()').extract()).strip()
    # subject_word = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"关键词")]/following::span[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"生成日期")]/following::span[1]/text()').extract()).strip()
    # # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"发布机构")]/following::span[1]/text()').extract()).strip()
    # if organ.startswith('市'):
    #     organ = '张掖' + organ

    fulltext_xpath = '//div[@class="conTxt"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99677'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZGQINGYANG"
    zt_provider = "zgqingyanggovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省陇南市
def policy_longnanlist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('pageCount:(\d+)', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'lmid=26' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="navjz"]/ul/li|//div[@class="xxgk_nav_con"]/ul/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            base_url = f'https://www.longnan.gov.cn/{callmodel.sql_model.list_rawid}'
            url = parse.urljoin(base_url, href)
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99679'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/span/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_longnanarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_longnanarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h1[@class="wztit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="文"]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="索"]/following::td[1]/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="hx-xxgkxl-wzxx"]//span[contains(text(),"是否有效")]/following::span[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="内容分类："]/following::td[1]/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="关"]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="发文日期："]/following::td[1]/text()').extract()).strip()
    # invalid_date = ''.join(res.xpath('//table[@class="table_suoyin hidden-xs"]//th[text()="废止时间："]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table[@class="table_suoyin hidden-sm hidden-xs"]//th[text()="发布机构："]/following::td[1]/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '陇南' + organ

    fulltext_xpath = '//div[contains(@class,"j-fontContent")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99679'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "LONGNAN"
    zt_provider = "longnangovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省临夏回族自治州
def policy_linxialist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('count=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = re.findall('rows=\\\\"(\d+)', para_dicts["data"]["1_1"]['html'])
        num = int(num[0]) if num else 10
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count/num)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page+1):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                # dic = {"page_info": f"{list_json['page_info']}_{page}"}
                # sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        html_json = json.loads(para_dicts["data"]["1_1"]['html'])
        res = Selector(text=html_json['data']['html'])
        # if 'jdhy/zcjd' in callmodel.sql_model.list_rawid:
        li_list = res.xpath('//div[@class="page-content"]//ul/li|//div[@class="page-content"]//table/tbody/tr')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            if not href:
                continue
            # base_url = f'http://fgw.guizhou.gov.cn/{callmodel.sql_model.list_rawid}.html'
            base_url = f'https://www.linxia.gov.cn/api-gateway/jpaas-publish-server/front/page/build/unit'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99680'
            article_json["url"] = url
            article_json["title"] = ''.join(li.xpath('.//a//text()').extract()).strip()
            article_json["pub_date"] = li.xpath('td[@class="table_fbrq"]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_linxiaarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_linxiaarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//h3[@class="text-tag canRead"]//text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//meta[@name="ArticleTitle"][1]/@content').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    pub_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"发文字号")]/following::td[1]/text()').extract()).strip()
    index_no = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"索") and contains(text(),"号")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"成文日期")]/following::td[1]/text()').extract()).strip()
    legal_status = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"有效性")]/following::td[1]/text()').extract()).strip()
    subject = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"主题分类")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table[@class="xxgk-table"]//td[contains(text(),"制发机构")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '临夏回族自治' + organ

    fulltext_xpath = '//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99680'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "LINXIA"
    zt_provider = "linxiagovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    # data['subject_word'] = subject_word
    data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省甘南藏族自治州
def policy_gnzrmzflist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall("p_next p_fun.*?/(\d+)", para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = max_count + 1
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 0:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(page_index + 1, total_page):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                dic = {"page_info": f"/{total_page - page}.htm"}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                # sql_dict["list_json"] = callmodel.sql_model.list_json
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it
        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        # if 'c100011' in callmodel.sql_model.list_rawid :
        li_list = res.xpath('//div[contains(@class,"govnewslist")]/ul/li|//ul[@class="ul-tbody"]/li')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('a/@href').extract_first()
            if not href:
                continue
            if page_index == 0:
                base_url = f'http://www.gnzrmzf.gov.cn/{callmodel.sql_model.list_rawid}.htm'
            else:
                base_url = f'http://www.gnzrmzf.gov.cn/{callmodel.sql_model.list_rawid}/1.htm'
            url = parse.urljoin(base_url, href)
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99682'
            article_json["url"] = url
            article_json["title"] = li.xpath('a/@title').extract_first().strip()
            article_json["pub_date"] = li.xpath('a/div[@class="r"]/text()|span/text()').extract_first().strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)
        result.next_dicts.insert.append(di_model_next)

    return result


def policy_gnzrmzfarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_gnzrmzfarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"题：")]/span/text()').extract()).strip()
    if not title:
        title = ''.join(res.xpath('//h1[@class="tit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"发文字号：")]/span/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"索 引 号：")]/span/text()').extract()).strip()
    # legal_status = ''.join(res.xpath('//div[@class="syhbox"]//font[contains(text(),"有")]/parent::li[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"成文日期：")]/span/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"发文机关：")]/span/text()').extract()).strip()
    if organ.startswith('州'):
        organ = '甘南藏族自治' + organ

    fulltext_xpath = '//div[contains(@id,"vsb_content")]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99682'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GNZRMZF"
    zt_provider = "gnzrmzfgovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = pub_date
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


#   甘肃省定西市
def policy_dingxilist_callback(callmodel: CallBackModel[PolicyListModel]) -> DealModel:
    result = DealModel()
    para_dicts = callmodel.para_dicts
    task_info = callmodel.redis_all.parse_dict["1_1"].task_info
    info_dicts = {"task_name": callmodel.sql_model.task_name,
                  "task_tag": callmodel.sql_model.task_tag,
                  "task_tag_next": task_info.task_tag_next}
    if "1_1" in para_dicts["data"]:
        max_count = re.findall('<totalrecord>(\d+)</totalrecord>', para_dicts["data"]["1_1"]['html'])
        max_count = int(max_count[0]) if max_count else 1
        total_page = math.ceil(max_count / 25)
        page_index = int(callmodel.sql_model.page_index)
        if page_index == 1:
            sql_dict = callmodel.sql_model.dict()
            di_model_bef = DealInsertModel()
            di_model_bef.insert_pre = CoreSqlValue.insert_ig_it
            sql_dict = deal_sql_dict(sql_dict)
            list_json = json.loads(callmodel.sql_model.list_json)
            for page in range(1, total_page + 1, 3):
                sql_dict["page"] = total_page
                sql_dict["page_index"] = page
                start = (page - 1) * 25 + 1
                end = (page + 2) * 25
                if end >= max_count:
                    end = max_count
                dic = {"start": start, "end": end, "page_info": list_json["page_info"]}
                sql_dict["list_json"] = json.dumps(dic, ensure_ascii=False)
                di_model_bef.lists.append(sql_dict.copy())
            result.befor_dicts.insert.append(di_model_bef)
        di_model_next = DealInsertModel()
        di_model_next.insert_pre = CoreSqlValue.insert_ig_it

        res = Selector(text=para_dicts["data"]["1_1"]['html'])
        li_list = res.xpath('//record')
        for li in li_list:
            temp = info_dicts.copy()
            temp["task_tag"] = temp["task_tag_next"]
            del temp["task_tag_next"]
            article_json = dict()
            href = li.xpath('.//a/@href').extract_first()
            base_url = f'http://www.dingxi.gov.cn'
            # url = parse.urljoin(base_url, href)
            url = href if 'http' in href else base_url + href
            if 'htm' not in url:
                continue
            rawid = re.findall('(.*?)\.', url.split('/')[-1])[0]
            temp["rawid"] = rawid
            temp["sub_db_id"] = '99678'
            article_json["url"] = url
            if '15863' == callmodel.sql_model.list_rawid or '15864' == callmodel.sql_model.list_rawid:
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                pub_date = li.xpath('td[5]/text()').extract_first().strip()
                article_json["pub_no"] = ''.join(li.xpath('td[3]/text()').extract()).strip()
                article_json["written_date"] = ''.join(li.xpath('td[4]/text()').extract()).strip()
            elif '15868' == callmodel.sql_model.list_rawid:
                article_json["title"] = li.xpath('td[2]/a/text()').extract_first().strip()
                pub_date = li.xpath('td[3]/text()').extract_first().strip()
            else:
                article_json["title"] = li.xpath('a/@title').extract_first().strip()
                pub_date = li.xpath('span[@class="bt-data-time"]/text()|.//a/span/text()').extract_first().strip()
            article_json["pub_date"] = pub_date.strip()
            temp["article_json"] = json.dumps(article_json, ensure_ascii=False)
            di_model_next.lists.append(temp)

        result.next_dicts.insert.append(di_model_next)

    return result


def policy_dingxiarticle_callback(callmodel: CallBackModel[PolicyArticleModel]) -> DealModel:
    result = DealModel()
    return result


def policy_dingxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="title"]//text()|//div[@id="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = article_json.get('pub_no', '')
    # index_no = ''.join(res.xpath('//table//td[contains(text(),"索引号:")]/following::td[1]/text()').extract()).strip()
    written_date = article_json.get('written_date', '')
    # legal_status = ''.join(res.xpath('//table//td[contains(text(),"有效性:")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//table//td[contains(text(),"发布机构:")]/following::td[1]/text()').extract()).strip()
    # if organ.startswith('市'):
    #     organ = '武威' + organ

    fulltext_xpath = '//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99678'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "DINGXI"
    zt_provider = "dingxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_fzgggansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="article"]/h1//text()|//div[@class="content"]/h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()

    fulltext_xpath = '//div[@id="content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99658'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "FZGGGANSU"
    zt_provider = "fzgggansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_gxtgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    fulltext_xpath = '//div[@class="nr-03"]|//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99659'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "GXTGANSU"
    zt_provider = "gxtgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_kjtgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="am-message-title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@id="zoom"]|//div[@id="newbody"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        fulltext_xpath = '//div[@id="message"]'
        fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99660'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "KJTGANSU"
    zt_provider = "kjtgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    # data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_jytgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="text_box d float"]/h1//text()|//div[@class="content"]/h1//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@id="zoom"]|//div[@class="content_text"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99661'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "JYTGANSU"
    zt_provider = "jytgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_mztgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="m_info_detail_title"]/h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@class="m_info_detail_content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99662'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "MZTGANSU"
    zt_provider = "mztgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_cztgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="con_bt"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@class="con_zw"]|//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99663'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "CZTGANSU"
    zt_provider = "cztgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_rstgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="new-title"]/div[@class="new-text"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@class="new-detail"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99664'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "RSTGANSU"
    zt_provider = "rstgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_nyncgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    # title = ''.join(res.xpath('//div[@class="new-title"]/div[@class="new-text"]//text()').extract()).strip()
    # if not title:
    #     title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@class="nr-03"]|//td[@class="bt_content"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99665'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "NYNCGANSU"
    zt_provider = "nyncgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_zjtgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//p[@id="title"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@id="detailcontent"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99666'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "ZJTGANSU"
    zt_provider = "zjtgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_wsjkgansuarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//div[@class="m_new_detail_title"]/div/h2//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    # index_no = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"文件编号")]/following::td[1]/text()').extract()).strip()
    # organ = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"发布单位")]/following::td[1]/text()').extract()).strip()
    # written_date = ''.join(res.xpath('//div[@class="infoBox"]//th[contains(text(),"生成日期")]/following::td[1]/text()').extract()).strip()

    fulltext_xpath = '//div[@id="contents"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99667'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "WSJKGANSU"
    zt_provider = "wsjkgansugovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    # data['organ'] = organ
    # data['index_no'] = index_no
    # data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_lanzhouarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    title = article_json['title']
    provider_url = article_json['url']
    pub_date = clean_pubdate(article_json['pub_date'])
    pub_year = pub_date[:4]
    res = Selector(text=html)

    title = ''.join(res.xpath('//td[@class="title"]//text()|//div[@class="zfxxgk_pageTit"]//text()').extract()).strip()
    if not title:
        title = article_json['title'].strip()
    pub_no = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"发文字号")]/span/text()').extract()).strip()
    index_no = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"索")]/span/text()').extract()).strip()
    subject = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"主题分类")]/span/text()').extract()).strip()
    subject_word = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"主") and contains(text(),"词")]/span/text()').extract()).strip()
    organ = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"发文机关")]/span/text()').extract()).strip()
    written_date = ''.join(res.xpath('//div[@class="contenttitle"]//font[contains(text(),"成文日期")]/span/text()').extract()).strip()
    if organ.startswith('市'):
        organ = '兰州' + organ
    fulltext_xpath = '//div[@id="zoom"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99668'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "LANZHOU"
    zt_provider = "lanzhougovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    data['subject'] = subject
    data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result


def policy_cztshaanxiarticle_etl_callback(callmodel) -> EtlDealModel:
    result = EtlDealModel()
    save_data = list()

    html = callmodel.para_dicts['data']['1_1']['html']
    article_json = json.loads(callmodel.sql_model.article_json)
    provider_url = article_json['url']
    res = Selector(text=html)

    title = ''.join(res.xpath('//h6[@class="text-c"]//text()|//div[@class="title"]/h1//text()').extract()).strip()
    if not title:
        raise Exception

    pub_date_info = ''.join(res.xpath('//table//td[contains(text(),"发布时间")]/following::td[1]/text()').extract()).strip()
    pub_date = clean_pubdate(pub_date_info)
    pub_year = pub_date[:4]
    if not pub_date:
        pub_date_info = ''.join(res.xpath('//h6[@class="text-c"]/parent::div/p/text()|//div[@class="title"]/p/text()').extract()).strip()
        pub_date = clean_pubdate(pub_date_info)
        pub_year = pub_date[:4]
    if not pub_date:
        raise Exception

    index_no = ''.join(res.xpath('//table//td[contains(text(),"索引号")]/following::td[1]/text()').extract()).strip()
    organ = ''.join(res.xpath('//table//td[contains(text(),"发文机构")]/following::td[1]/text()').extract()).strip()
    written_date = ''.join(res.xpath('//table//td[contains(text(),"发文日期")]/following::td[1]/text()').extract()).strip()
    if organ.startswith('省'):
        organ = '陕西' + organ
    fulltext_xpath = '//div[@id="vsb_newscontent"]|//div[@class="w-contents infoContent"]'
    fulltext = res.xpath(fulltext_xpath).extract_first()
    if not fulltext:
        raise Exception

    down_date_str = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    sub_db_id = '99625'
    rawid = callmodel.sql_model.rawid
    lngid = BaseLngid().GetLngid(sub_db_id, rawid)
    product = "CZTSHAANXI"
    zt_provider = "cztshaanxigovpolicy"
    data = init_data(rawid, lngid, sub_db_id, down_date_str, product, zt_provider)
    print(lngid)

    data['title'] = title
    data['provider_url'] = provider_url
    data['pub_date'] = clean_pubdate(pub_date)
    data['pub_year'] = pub_year
    # data['pub_no'] = pub_no
    data['organ'] = organ
    data['index_no'] = index_no
    data['written_date'] = clean_pubdate(written_date)
    # data['impl_date'] = clean_pubdate(impl_date)
    # data['invalid_date'] = clean_pubdate(invalid_date)
    # data['subject'] = subject
    # data['subject_word'] = subject_word
    # data['legal_status'] = legal_status

    save_data.append({'table': 'policy_latest', 'data': data})
    full_text_data = init_full_text_data(lngid, sub_db_id, down_date_str, fulltext, pub_year)
    save_data.append({'table': 'policy_fulltext_latest', 'data': full_text_data})
    result.save_data = save_data

    file_info = get_file_info(data, res, f'({fulltext_xpath})')
    di_model_bef = DealUpdateModel()
    if file_info:
        di_model_bef.update.update({"other_dicts": json.dumps(file_info, ensure_ascii=False)})
    else:
        di_model_bef.update.update({"other_dicts": "{}"})
    di_model_bef.where.update({"rawid": callmodel.sql_model.rawid, "task_tag": callmodel.sql_model.task_tag,
                               "task_name": callmodel.sql_model.task_name})
    result.befor_dicts.update_list.append(di_model_bef)
    return result

